diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 56f800fcc51b..38e0131182fd 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,911 +1,912 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ktrace.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for acct_process() function prototype */ #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include /* Required to be non-static for SysVR4 emulator */ MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status"); /* Hook for NFS teardown procedure. */ void (*nlminfo_release_p)(struct proc *p); /* * exit -- * Death of process. * * MPSAFE */ void sys_exit(struct thread *td, struct sys_exit_args *uap) { exit1(td, W_EXITCODE(uap->rval, 0)); /* NOTREACHED */ } /* * Exit: deallocate address space and other resources, change proc state * to zombie, and unlink proc from allproc and parent's lists. Save exit * status and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rv) { struct bintime new_switchtime; struct proc *p, *nq, *q; struct tty *tp; struct vnode *ttyvp; struct vmspace *vm; struct vnode *vtmp; #ifdef KTRACE struct vnode *tracevp; struct ucred *tracecred; #endif struct plimit *plim; int locked, refcnt; /* * Drop Giant if caller has it. Eventually we should warn about * being called with Giant held. */ while (mtx_owned(&Giant)) mtx_unlock(&Giant); p = td->td_proc; if (p == initproc) { printf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); if (p->p_flag & P_HADTHREADS) { retry: /* * First check if some other thread got here before us.. * if so, act apropriatly, (exit or suspend); */ thread_suspend_check(0); /* * Kill off the other threads. This requires * some co-operation from other parts of the kernel * so it may not be instantaneous. With this state set * any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediately with EINTR or EWOULDBLOCK * which will hopefully force them to back out to userland * freeing resources as they go. Any thread attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last of the * other threads exits. * If there is already a thread singler after resumption, * calling thread_single will fail; in that case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (thread_single(SINGLE_EXIT)) goto retry; /* * All other activity in this process is now stopped. * Threading support has been turned off. */ } p->p_flag |= P_WEXIT; PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); PROC_UNLOCK(p); /* Are we a task leader? */ if (p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } PROC_LOCK(p); _STOPEVENT(p, S_EXIT, rv); wakeup(&p->p_stype); /* Wakeup anyone in procfs' PIOCWAIT */ PROC_UNLOCK(p); /* * Check if any loadable modules need anything done at process exit. * E.g. SYSV IPC stuff * XXX what if one of these generates an error? */ EVENTHANDLER_INVOKE(process_exit, p); MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage), M_ZOMBIE, M_WAITOK); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); stopprofclock(p); p->p_flag &= ~(P_TRACED | P_PPWAIT); /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && callout_stop(&p->p_itcallout) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } sigqueue_flush(&p->p_sigqueue); sigqueue_flush(&td->td_sigqueue); PROC_UNLOCK(p); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. */ mtx_lock(&Giant); /* XXX: not sure if needed */ funsetownlst(&p->p_sigiolst); mtx_unlock(&Giant); /* * If this process has an nlminfo data area (for lockd), release it */ if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) (*nlminfo_release_p)(p); /* * Close open files and release open-file table. * This may block! */ fdfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ mtx_lock(&ppeers_lock); if (p->p_leader->p_peers) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); /* The next two chunks should probably be moved to vmspace_exit. */ vm = p->p_vmspace; /* * Release user portion of address space. * This releases references to vnodes, * which could cause I/O if the file has been unlinked. * Need to do this early enough that we can still sleep. * Can't free the entire vmspace as the kernel stack * may be mapped within that space also. * * Processes sharing the same vmspace may exit in one order, and * get cleaned up by vmspace_exit() in a different order. The * last exiting process to reach this point releases as much of * the environment as it can, and the last process cleaned up * by vmspace_exit() (which decrements exitingcnt) cleans up the * remainder. */ atomic_add_int(&vm->vm_exitingcnt, 1); do refcnt = vm->vm_refcnt; while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1)); if (refcnt == 1) { shmexit(vm); pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map), vm_map_max(&vm->vm_map)); (void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), vm_map_max(&vm->vm_map)); } sx_xlock(&proctree_lock); if (SESS_LEADER(p)) { struct session *sp; sp = p->p_session; if (sp->s_ttyvp) { locked = VFS_LOCK_GIANT(sp->s_ttyvp->v_mount); /* * Controlling process. * Signal foreground pgrp, * drain controlling terminal * and revoke access to controlling terminal. */ if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) { tp = sp->s_ttyp; if (sp->s_ttyp->t_pgrp) { PGRP_LOCK(sp->s_ttyp->t_pgrp); pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1); PGRP_UNLOCK(sp->s_ttyp->t_pgrp); } /* XXX tp should be locked. */ sx_xunlock(&proctree_lock); (void) ttywait(tp); sx_xlock(&proctree_lock); /* * The tty could have been revoked * if we blocked. */ if (sp->s_ttyvp) { ttyvp = sp->s_ttyvp; SESS_LOCK(p->p_session); sp->s_ttyvp = NULL; SESS_UNLOCK(p->p_session); sx_xunlock(&proctree_lock); VOP_LOCK(ttyvp, LK_EXCLUSIVE, td); VOP_REVOKE(ttyvp, REVOKEALL); vput(ttyvp); sx_xlock(&proctree_lock); } } if (sp->s_ttyvp) { ttyvp = sp->s_ttyvp; SESS_LOCK(p->p_session); sp->s_ttyvp = NULL; SESS_UNLOCK(p->p_session); vrele(ttyvp); } /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. * (for logging and informational purposes) */ VFS_UNLOCK_GIANT(locked); } SESS_LOCK(p->p_session); sp->s_leader = NULL; SESS_UNLOCK(p->p_session); } fixjobc(p, p->p_pgrp, 0); sx_xunlock(&proctree_lock); (void)acct_process(td); #ifdef KTRACE /* * Drain any pending records on the thread and release the trace * file. It might be better if drain-and-clear were atomic. */ ktrprocexit(td); PROC_LOCK(p); mtx_lock(&ktrace_mtx); p->p_traceflag = 0; /* don't trace the vrele() */ tracevp = p->p_tracevp; p->p_tracevp = NULL; tracecred = p->p_tracecred; p->p_tracecred = NULL; mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); if (tracevp != NULL) { locked = VFS_LOCK_GIANT(tracevp->v_mount); vrele(tracevp); VFS_UNLOCK_GIANT(locked); } if (tracecred != NULL) crfree(tracecred); #endif /* * Release reference to text vnode */ if ((vtmp = p->p_textvp) != NULL) { p->p_textvp = NULL; locked = VFS_LOCK_GIANT(vtmp->v_mount); vrele(vtmp); VFS_UNLOCK_GIANT(locked); } /* * Release our limits structure. */ PROC_LOCK(p); plim = p->p_limit; p->p_limit = NULL; PROC_UNLOCK(p); lim_free(plim); /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(initproc); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); proc_reparent(q, initproc); q->p_sigparent = SIGCHLD; /* * Traced processes are killed * since their existence means someone is screwing up. */ if (q->p_flag & P_TRACED) { q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); psignal(q, SIGKILL); } PROC_UNLOCK(q); } /* * Save exit status and finalize rusage info except for times, - * adding in child rusage info. + * adding in child rusage info later when our time is locked. */ PROC_LOCK(p); p->p_xstat = rv; p->p_xthread = td; p->p_stats->p_ru.ru_nvcsw++; *p->p_ru = p->p_stats->p_ru; - ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); /* * Notify interested parties of our demise. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); /* * Just delete all entries in the p_klist. At this point we won't * report any more events, and there are nasty race conditions that * can beat us if we don't. */ knlist_clear(&p->p_klist, 1); /* * Notify parent that we're gone. If parent has the PS_NOCLDWAIT * flag set, or if the handler is set to SIG_IGN, notify process * 1 instead (and hope it will handle this situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, initproc); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * If this was the last child of our parent, notify * parent, so in case he was wait(2)ing, he will * continue. */ if (LIST_EMPTY(&pp->p_children)) wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == initproc) psignal(p->p_pptr, SIGCHLD); else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) childproc_exited(p); else /* LINUX thread */ psignal(p->p_pptr, p->p_sigparent); } PROC_UNLOCK(p->p_pptr); /* * If this is a kthread, then wakeup anyone waiting for it to exit. */ if (p->p_flag & P_KTHREAD) wakeup(p); PROC_UNLOCK(p); /* * Finally, call machine-dependent code to release the remaining * resources including address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); WITNESS_WARN(WARN_PANIC, &proctree_lock.sx_object, "process (pid %d) exiting", p->p_pid); PROC_LOCK(p); PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid all possible context * switches (including ones from blocking on a mutex) while * marked as a zombie. We also have to set the zombie state * before we release the parent process' proc lock to avoid * a lost wakeup. So, we first call wakeup, then we grab the * sched lock, update the state, and release the parent process' * proc lock. */ wakeup(p->p_pptr); mtx_lock_spin(&sched_lock); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); + ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); + /* Do the same timestamp bookkeeping that mi_switch() would do. */ binuptime(&new_switchtime); bintime_add(&p->p_rux.rux_runtime, &new_switchtime); bintime_sub(&p->p_rux.rux_runtime, PCPU_PTR(switchtime)); PCPU_SET(switchtime, new_switchtime); PCPU_SET(switchticks, ticks); cnt.v_swtch++; sched_exit(p->p_pptr, td); /* * Hopefully no one will try to deliver a signal to the process this * late in the game. */ knlist_destroy(&p->p_klist); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); } #ifndef _SYS_SYSPROTO_H_ struct abort2_args { char *why; int nargs; void **args; }; #endif /* * MPSAFE. */ int abort2(struct thread *td, struct abort2_args *uap) { struct proc *p = td->td_proc; struct sbuf *sb; void *uargs[16]; int error, i, sig; error = 0; /* satisfy compiler */ /* * Do it right now so we can log either proper call of abort2(), or * note, that invalid argument was passed. 512 is big enough to * handle 16 arguments' descriptions with additional comments. */ sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); sbuf_clear(sb); sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", p->p_comm, p->p_pid, td->td_ucred->cr_uid); /* * Since we can't return from abort2(), send SIGKILL in cases, where * abort2() was called improperly */ sig = SIGKILL; /* Prevent from DoSes from user-space. */ if (uap->nargs < 0 || uap->nargs > 16) goto out; if (uap->args == NULL) goto out; error = copyin(uap->args, uargs, uap->nargs * sizeof(void *)); if (error != 0) goto out; /* * Limit size of 'reason' string to 128. Will fit even when * maximal number of arguments was chosen to be logged. */ if (uap->why != NULL) { error = sbuf_copyin(sb, uap->why, 128); if (error < 0) goto out; } else { sbuf_printf(sb, "(null)"); } if (uap->nargs) { sbuf_printf(sb, "("); for (i = 0;i < uap->nargs; i++) sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); sbuf_printf(sb, ")"); } /* * Final stage: arguments were proper, string has been * successfully copied from userspace, and copying pointers * from user-space succeed. */ sig = SIGABRT; out: if (sig == SIGKILL) { sbuf_trim(sb); sbuf_printf(sb, " (Reason text inaccessible)"); } sbuf_cat(sb, "\n"); sbuf_finish(sb); log(LOG_INFO, "%s", sbuf_data(sb)); sbuf_delete(sb); exit1(td, W_EXITCODE(0, sig)); return (0); } #ifdef COMPAT_43 /* * The dirty work is handled by kern_wait(). * * MPSAFE. */ int owait(struct thread *td, struct owait_args *uap __unused) { int error, status; error = kern_wait(td, WAIT_ANY, &status, 0, NULL); if (error == 0) td->td_retval[1] = status; return (error); } #endif /* COMPAT_43 */ /* * The dirty work is handled by kern_wait(). * * MPSAFE. */ int wait4(struct thread *td, struct wait_args *uap) { struct rusage ru, *rup; int error, status; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (uap->status != NULL && error == 0) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rusage) { struct proc *p, *q, *t; int error, nfound; q = td->td_proc; if (pid == 0) { PROC_LOCK(q); pid = -q->p_pgid; PROC_UNLOCK(q); } if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE)) return (EINVAL); loop: if (q->p_flag & P_STATCHILD) { PROC_LOCK(q); q->p_flag &= ~P_STATCHILD; PROC_UNLOCK(q); } nfound = 0; sx_xlock(&proctree_lock); LIST_FOREACH(p, &q->p_children, p_sibling) { PROC_LOCK(p); if (pid != WAIT_ANY && p->p_pid != pid && p->p_pgid != -pid) { PROC_UNLOCK(p); continue; } if (p_canwait(td, p)) { PROC_UNLOCK(p); continue; } /* * This special case handles a kthread spawned by linux_clone * (see linux_misc.c). The linux_wait4 and linux_waitpid * functions need to be able to distinguish between waiting * on a process and waiting on a thread. It is a thread if * p_sigparent is not SIGCHLD, and the WLINUXCLONE option * signifies we want to wait for threads and not processes. */ if ((p->p_sigparent != SIGCHLD) ^ ((options & WLINUXCLONE) != 0)) { PROC_UNLOCK(p); continue; } nfound++; if (p->p_state == PRS_ZOMBIE) { /* * It is possible that the last thread of this * process is still running on another CPU * in thread_exit() after having dropped the process * lock via PROC_UNLOCK() but before it has completed * cpu_throw(). In that case, the other thread must * still hold sched_lock, so simply by acquiring * sched_lock once we will wait long enough for the * thread to exit in that case. */ mtx_lock_spin(&sched_lock); mtx_unlock_spin(&sched_lock); td->td_retval[0] = p->p_pid; if (status) *status = p->p_xstat; /* convert to int */ if (rusage) { *rusage = *p->p_ru; calcru(p, &rusage->ru_utime, &rusage->ru_stime); } PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); /* * If we got the child via a ptrace 'attach', * we need to give it back to the old parent. */ PROC_UNLOCK(p); if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { PROC_LOCK(p); p->p_oppid = 0; proc_reparent(p, t); PROC_UNLOCK(p); tdsignal(t, NULL, SIGCHLD, p->p_ksi); wakeup(t); PROC_UNLOCK(t); sx_xunlock(&proctree_lock); return (0); } /* * Remove other references to this process to ensure * we have an exclusive reference. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); /* off zombproc */ sx_xunlock(&allproc_lock); LIST_REMOVE(p, p_sibling); leavepgrp(p); sx_xunlock(&proctree_lock); /* * As a side effect of this lock, we know that * all other writes to this proc are visible now, so * no more locking is needed for p. */ PROC_LOCK(p); p->p_xstat = 0; /* XXX: why? */ PROC_UNLOCK(p); PROC_LOCK(q); ruadd(&q->p_stats->p_cru, &q->p_crux, p->p_ru, &p->p_rux); PROC_UNLOCK(q); FREE(p->p_ru, M_ZOMBIE); p->p_ru = NULL; /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); /* * Free credentials, arguments, and sigacts. */ crfree(p->p_ucred); p->p_ucred = NULL; pargs_drop(p->p_args); p->p_args = NULL; sigacts_free(p->p_sigacts); p->p_sigacts = NULL; /* * Do any thread-system specific cleanups. */ thread_wait(p); /* * Give vm and machine-dependent layer a chance * to free anything that cpu_exit couldn't * release while still running in process context. */ vm_waitproc(p); #ifdef MAC mac_destroy_proc(p); #endif KASSERT(FIRST_THREAD_IN_PROC(p), ("kern_wait: no residual thread!")); uma_zfree(proc_zone, p); sx_xlock(&allproc_lock); nprocs--; sx_xunlock(&allproc_lock); return (0); } mtx_lock_spin(&sched_lock); if ((p->p_flag & P_STOPPED_SIG) && (p->p_suspcount == p->p_numthreads) && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || options & WUNTRACED)) { mtx_unlock_spin(&sched_lock); p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; if (status) *status = W_STOPCODE(p->p_xstat); PROC_UNLOCK(p); PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); return (0); } mtx_unlock_spin(&sched_lock); if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) { sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; p->p_flag &= ~P_CONTINUED; PROC_UNLOCK(p); PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); if (status) *status = SIGCONT; return (0); } PROC_UNLOCK(p); } if (nfound == 0) { sx_xunlock(&proctree_lock); return (ECHILD); } if (options & WNOHANG) { sx_xunlock(&proctree_lock); td->td_retval[0] = 0; return (0); } PROC_LOCK(q); sx_xunlock(&proctree_lock); if (q->p_flag & P_STATCHILD) { q->p_flag &= ~P_STATCHILD; error = 0; } else error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0); PROC_UNLOCK(q); if (error) return (error); goto loop; } /* * Make process 'parent' the new parent of process 'child'. * Must be called with an exclusive hold of proctree lock. */ void proc_reparent(struct proc *child, struct proc *parent) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(child, MA_OWNED); if (child->p_pptr == parent) return; LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); child->p_pptr = parent; } diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 397a7c507bce..ccfa07e70017 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1,1178 +1,1178 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static struct mtx uihashtbl_mtx; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); /* * Resource controls and accounting. */ #ifndef _SYS_SYSPROTO_H_ struct getpriority_args { int which; int who; }; #endif /* * MPSAFE */ int getpriority(td, uap) struct thread *td; register struct getpriority_args *uap; { struct proc *p; struct pgrp *pg; int error, low; error = 0; low = PRIO_MAX + 1; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) low = td->td_proc->p_nice; else { p = pfind(uap->who); if (p == NULL) break; if (p_cansee(td, p) == 0) low = p->p_nice; PROC_UNLOCK(p); } break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (!p_cansee(td, p)) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { PROC_LOCK(p); if (!p_cansee(td, p) && p->p_ucred->cr_uid == uap->who) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (low == PRIO_MAX + 1 && error == 0) error = ESRCH; td->td_retval[0] = low; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setpriority_args { int which; int who; int prio; }; #endif /* * MPSAFE */ int setpriority(td, uap) struct thread *td; struct setpriority_args *uap; { struct proc *curp, *p; struct pgrp *pg; int found = 0, error = 0; curp = td->td_proc; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) { PROC_LOCK(curp); error = donice(td, curp, uap->prio); PROC_UNLOCK(curp); } else { p = pfind(uap->who); if (p == 0) break; if (p_cansee(td, p) == 0) error = donice(td, p, uap->prio); PROC_UNLOCK(p); } found++; break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = curp->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (!p_cansee(td, p)) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_ucred->cr_uid == uap->who && !p_cansee(td, p)) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (found == 0 && error == 0) error = ESRCH; return (error); } /* * Set "nice" for a (whole) process. */ static int donice(struct thread *td, struct proc *p, int n) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = p_cansched(td, p))) return (error); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; if (n < p->p_nice && suser(td) != 0) return (EACCES); mtx_lock_spin(&sched_lock); sched_nice(p, n); mtx_unlock_spin(&sched_lock); return (0); } /* * Set realtime priority. * * MPSAFE */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_args { int function; pid_t pid; struct rtprio *rtp; }; #endif int rtprio(td, uap) struct thread *td; /* curthread */ register struct rtprio_args *uap; { struct proc *curp; struct proc *p; struct ksegrp *kg; struct rtprio rtp; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; curp = td->td_proc; if (uap->pid == 0) { p = curp; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; mtx_lock_spin(&sched_lock); /* * Return OUR priority if no pid specified, * or if one is, report the highest priority * in the process. There isn't much more you can do as * there is only room to return a single priority. * XXXKSE: maybe need a new interface to report * priorities of multiple system scope threads. * Note: specifying our own pid is not the same * as leaving it zero. */ if (uap->pid == 0) { pri_to_rtp(td->td_ksegrp, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_KSEGRP_IN_PROC(p, kg) { pri_to_rtp(kg, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && rtp2.prio < rtp.prio)) { rtp.type = rtp2.type; rtp.prio = rtp2.prio; } } } mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* Disallow setting rtprio in most cases if not superuser. */ if (suser(td) != 0) { /* can't set someone else's */ if (uap->pid) { error = EPERM; break; } /* can't set realtime priority */ /* * Realtime priority has to be restricted for reasons which should be * obvious. However, for idle priority, there is a potential for * system deadlock if an idleprio process gains a lock on a resource * that other processes need (and the idleprio process can't run * due to a CPU-bound normal process). Fix me! XXX */ #if 0 if (RTP_PRIO_IS_REALTIME(rtp.type)) { #else if (rtp.type != RTP_PRIO_NORMAL) { #endif error = EPERM; break; } } /* * If we are setting our own priority, set just our * KSEGRP but if we are doing another process, * do all the groups on that process. If we * specify our own pid we do the latter. */ mtx_lock_spin(&sched_lock); if (uap->pid == 0) { error = rtp_to_pri(&rtp, td->td_ksegrp); } else { FOREACH_KSEGRP_IN_PROC(p, kg) { if ((error = rtp_to_pri(&rtp, kg)) != 0) { break; } } } mtx_unlock_spin(&sched_lock); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } int rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg) { mtx_assert(&sched_lock, MA_OWNED); if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } sched_class(kg, rtp->type); if (curthread->td_ksegrp == kg) { sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */ } return (0); } void pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp) { mtx_assert(&sched_lock, MA_OWNED); switch (PRI_BASE(kg->kg_pri_class)) { case PRI_REALTIME: rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE; break; default: break; } rtp->type = kg->kg_pri_class; } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osetrlimit_args { u_int which; struct orlimit *rlp; }; #endif /* * MPSAFE */ int osetrlimit(td, uap) struct thread *td; register struct osetrlimit_args *uap; { struct orlimit olim; struct rlimit lim; int error; if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) return (error); lim.rlim_cur = olim.rlim_cur; lim.rlim_max = olim.rlim_max; error = kern_setrlimit(td, uap->which, &lim); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogetrlimit_args { u_int which; struct orlimit *rlp; }; #endif /* * MPSAFE */ int ogetrlimit(td, uap) struct thread *td; register struct ogetrlimit_args *uap; { struct orlimit olim; struct rlimit rl; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rl); PROC_UNLOCK(p); /* * XXX would be more correct to convert only RLIM_INFINITY to the * old RLIM_INFINITY and fail with EOVERFLOW for other larger * values. Most 64->32 and 32->16 conversions, including not * unimportant ones of uids are even more broken than what we * do here (they blindly truncate). We don't do this correctly * here since we have little experience with EOVERFLOW yet. * Elsewhere, getuid() can't fail... */ olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; error = copyout(&olim, uap->rlp, sizeof(olim)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct __setrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* * MPSAFE */ int setrlimit(td, uap) struct thread *td; register struct __setrlimit_args *uap; { struct rlimit alim; int error; if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) return (error); error = kern_setrlimit(td, uap->which, &alim); return (error); } int kern_setrlimit(td, which, limp) struct thread *td; u_int which; struct rlimit *limp; { struct plimit *newlim, *oldlim; struct proc *p; register struct rlimit *alimp; rlim_t oldssiz; int error; if (which >= RLIM_NLIMITS) return (EINVAL); /* * Preserve historical bugs by treating negative limits as unsigned. */ if (limp->rlim_cur < 0) limp->rlim_cur = RLIM_INFINITY; if (limp->rlim_max < 0) limp->rlim_max = RLIM_INFINITY; oldssiz = 0; p = td->td_proc; newlim = lim_alloc(); PROC_LOCK(p); oldlim = p->p_limit; alimp = &oldlim->pl_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) if ((error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL))) { PROC_UNLOCK(p); lim_free(newlim); return (error); } if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; lim_copy(newlim, oldlim); alimp = &newlim->pl_rlimit[which]; switch (which) { case RLIMIT_CPU: mtx_lock_spin(&sched_lock); p->p_cpulimit = limp->rlim_cur; mtx_unlock_spin(&sched_lock); break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) limp->rlim_cur = maxdsiz; if (limp->rlim_max > maxdsiz) limp->rlim_max = maxdsiz; break; case RLIMIT_STACK: if (limp->rlim_cur > maxssiz) limp->rlim_cur = maxssiz; if (limp->rlim_max > maxssiz) limp->rlim_max = maxssiz; oldssiz = alimp->rlim_cur; break; case RLIMIT_NOFILE: if (limp->rlim_cur > maxfilesperproc) limp->rlim_cur = maxfilesperproc; if (limp->rlim_max > maxfilesperproc) limp->rlim_max = maxfilesperproc; break; case RLIMIT_NPROC: if (limp->rlim_cur > maxprocperuid) limp->rlim_cur = maxprocperuid; if (limp->rlim_max > maxprocperuid) limp->rlim_max = maxprocperuid; if (limp->rlim_cur < 1) limp->rlim_cur = 1; if (limp->rlim_max < 1) limp->rlim_max = 1; break; } *alimp = *limp; p->p_limit = newlim; PROC_UNLOCK(p); lim_free(oldlim); if (which == RLIMIT_STACK) { /* * Stack is allocated to the max at exec time with only * "rlim_cur" bytes accessible. If stack limit is going * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != oldssiz) { vm_offset_t addr; vm_size_t size; vm_prot_t prot; if (limp->rlim_cur > oldssiz) { prot = p->p_sysent->sv_stackprot; size = limp->rlim_cur - oldssiz; addr = p->p_sysent->sv_usrstack - limp->rlim_cur; } else { prot = VM_PROT_NONE; size = oldssiz - limp->rlim_cur; addr = p->p_sysent->sv_usrstack - oldssiz; } addr = trunc_page(addr); size = round_page(size); (void)vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, FALSE); } } /* * The data size limit may need to be changed to a value * that makes sense for the 32 bit binary. */ if (p->p_sysent->sv_fixlimits != NULL) p->p_sysent->sv_fixlimits(p); return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* * MPSAFE */ /* ARGSUSED */ int getrlimit(td, uap) struct thread *td; register struct __getrlimit_args *uap; { struct rlimit rlim; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rlim); PROC_UNLOCK(p); error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); return (error); } /* * Transform the running time and tick information in proc p into user, * system, and interrupt time usage. */ void calcru(p, up, sp) struct proc *p; struct timeval *up; struct timeval *sp; { struct bintime bt; struct rusage_ext rux; struct thread *td; int bt_valid; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&sched_lock, MA_NOTOWNED); bt_valid = 0; mtx_lock_spin(&sched_lock); rux = p->p_rux; FOREACH_THREAD_IN_PROC(p, td) { if (TD_IS_RUNNING(td)) { /* * Adjust for the current time slice. This is * actually fairly important since the error here is * on the order of a time quantum which is much * greater than the precision of binuptime(). */ KASSERT(td->td_oncpu != NOCPU, ("%s: running thread has no CPU", __func__)); if (!bt_valid) { binuptime(&bt); bt_valid = 1; } bintime_add(&rux.rux_runtime, &bt); bintime_sub(&rux.rux_runtime, &pcpu_find(td->td_oncpu)->pc_switchtime); } } - mtx_unlock_spin(&sched_lock); calcru1(p, &rux, up, sp); + mtx_unlock_spin(&sched_lock); p->p_rux.rux_uu = rux.rux_uu; p->p_rux.rux_su = rux.rux_su; p->p_rux.rux_iu = rux.rux_iu; } void calccru(p, up, sp) struct proc *p; struct timeval *up; struct timeval *sp; { PROC_LOCK_ASSERT(p, MA_OWNED); calcru1(p, &p->p_crux, up, sp); } static void calcru1(p, ruxp, up, sp) struct proc *p; struct rusage_ext *ruxp; struct timeval *up; struct timeval *sp; { struct timeval tv; /* {user, system, interrupt, total} {ticks, usec}; previous tu: */ u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu; ut = ruxp->rux_uticks; st = ruxp->rux_sticks; it = ruxp->rux_iticks; tt = ut + st + it; if (tt == 0) { st = 1; tt = 1; } bintime2timeval(&ruxp->rux_runtime, &tv); tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec; ptu = ruxp->rux_uu + ruxp->rux_su + ruxp->rux_iu; if (tu < ptu) { printf( "calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n", (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm); tu = ptu; } if ((int64_t)tu < 0) { printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); tu = ptu; } /* Subdivide tu. */ uu = (tu * ut) / tt; su = (tu * st) / tt; iu = tu - uu - su; /* Enforce monotonicity. */ if (uu < ruxp->rux_uu || su < ruxp->rux_su || iu < ruxp->rux_iu) { if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; else if (uu + ruxp->rux_su + ruxp->rux_iu > tu) uu = tu - ruxp->rux_su - ruxp->rux_iu; if (st == 0) su = ruxp->rux_su; else { su = ((tu - uu) * st) / (st + it); if (su < ruxp->rux_su) su = ruxp->rux_su; else if (uu + su + ruxp->rux_iu > tu) su = tu - uu - ruxp->rux_iu; } KASSERT(uu + su + ruxp->rux_iu <= tu, ("calcru: monotonisation botch 1")); iu = tu - uu - su; KASSERT(iu >= ruxp->rux_iu, ("calcru: monotonisation botch 2")); } ruxp->rux_uu = uu; ruxp->rux_su = su; ruxp->rux_iu = iu; up->tv_sec = uu / 1000000; up->tv_usec = uu % 1000000; sp->tv_sec = su / 1000000; sp->tv_usec = su % 1000000; } #ifndef _SYS_SYSPROTO_H_ struct getrusage_args { int who; struct rusage *rusage; }; #endif /* * MPSAFE */ int getrusage(td, uap) register struct thread *td; register struct getrusage_args *uap; { struct rusage ru; int error; error = kern_getrusage(td, uap->who, &ru); if (error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_getrusage(td, who, rup) struct thread *td; int who; struct rusage *rup; { struct proc *p; p = td->td_proc; PROC_LOCK(p); switch (who) { case RUSAGE_SELF: *rup = p->p_stats->p_ru; calcru(p, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_CHILDREN: *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); break; default: PROC_UNLOCK(p); return (EINVAL); } PROC_UNLOCK(p); return (0); } void ruadd(ru, rux, ru2, rux2) struct rusage *ru; struct rusage_ext *rux; struct rusage *ru2; struct rusage_ext *rux2; { register long *ip, *ip2; register int i; bintime_add(&rux->rux_runtime, &rux2->rux_runtime); rux->rux_uticks += rux2->rux_uticks; rux->rux_sticks += rux2->rux_sticks; rux->rux_iticks += rux2->rux_iticks; rux->rux_uu += rux2->rux_uu; rux->rux_su += rux2->rux_su; rux->rux_iu += rux2->rux_iu; if (ru->ru_maxrss < ru2->ru_maxrss) ru->ru_maxrss = ru2->ru_maxrss; ip = &ru->ru_first; ip2 = &ru2->ru_first; for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) *ip++ += *ip2++; } /* * Allocate a new resource limits structure and initialize its * reference count and mutex pointer. */ struct plimit * lim_alloc() { struct plimit *limp; limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); refcount_init(&limp->pl_refcnt, 1); return (limp); } struct plimit * lim_hold(limp) struct plimit *limp; { refcount_acquire(&limp->pl_refcnt); return (limp); } void lim_free(limp) struct plimit *limp; { KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); if (refcount_release(&limp->pl_refcnt)) free((void *)limp, M_PLIMIT); } /* * Make a copy of the plimit structure. * We share these structures copy-on-write after fork. */ void lim_copy(dst, src) struct plimit *dst, *src; { KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); } /* * Return the hard limit for a particular system resource. The * which parameter specifies the index into the rlimit array. */ rlim_t lim_max(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_max); } /* * Return the current (soft) limit for a particular system resource. * The which parameter which specifies the index into the rlimit array */ rlim_t lim_cur(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_cur); } /* * Return a copy of the entire rlimit structure for the system limit * specified by 'which' in the rlimit structure pointed to by 'rlp'. */ void lim_rlimit(struct proc *p, int which, struct rlimit *rlp) { PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; } /* * Find the uidinfo structure for a uid. This structure is used to * track the total resource consumption (process count, socket buffer * size, etc.) for the uid and impose limits. */ void uihashinit() { uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); } /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_mtx must be locked. */ static struct uidinfo * uilookup(uid) uid_t uid; { struct uihashhead *uipp; struct uidinfo *uip; mtx_assert(&uihashtbl_mtx, MA_OWNED); uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) break; return (uip); } /* * Find or allocate a struct uidinfo for a particular uid. * Increase refcount on uidinfo struct returned. * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * uifind(uid) uid_t uid; { struct uidinfo *old_uip, *uip; mtx_lock(&uihashtbl_mtx); uip = uilookup(uid); if (uip == NULL) { mtx_unlock(&uihashtbl_mtx); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); mtx_lock(&uihashtbl_mtx); /* * There's a chance someone created our uidinfo while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ if ((old_uip = uilookup(uid)) != NULL) { /* Someone else beat us to it. */ free(uip, M_UIDINFO); uip = old_uip; } else { uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); uip->ui_uid = uid; LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); } } uihold(uip); mtx_unlock(&uihashtbl_mtx); return (uip); } /* * Place another refcount on a uidinfo struct. */ void uihold(uip) struct uidinfo *uip; { UIDINFO_LOCK(uip); uip->ui_ref++; UIDINFO_UNLOCK(uip); } /*- * Since uidinfo structs have a long lifetime, we use an * opportunistic refcounting scheme to avoid locking the lookup hash * for each release. * * If the refcount hits 0, we need to free the structure, * which means we need to lock the hash. * Optimal case: * After locking the struct and lowering the refcount, if we find * that we don't need to free, simply unlock and return. * Suboptimal case: * If refcount lowering results in need to free, bump the count * back up, loose the lock and aquire the locks in the proper * order to try again. */ void uifree(uip) struct uidinfo *uip; { /* Prepare for optimal case. */ UIDINFO_LOCK(uip); if (--uip->ui_ref != 0) { UIDINFO_UNLOCK(uip); return; } /* Prepare for suboptimal case. */ uip->ui_ref++; UIDINFO_UNLOCK(uip); mtx_lock(&uihashtbl_mtx); UIDINFO_LOCK(uip); /* * We must subtract one from the count again because we backed out * our initial subtraction before dropping the lock. * Since another thread may have added a reference after we dropped the * initial lock we have to test for zero again. */ if (--uip->ui_ref == 0) { LIST_REMOVE(uip, ui_hash); mtx_unlock(&uihashtbl_mtx); if (uip->ui_sbsize != 0) printf("freeing uidinfo: uid = %d, sbsize = %jd\n", uip->ui_uid, (intmax_t)uip->ui_sbsize); if (uip->ui_proccnt != 0) printf("freeing uidinfo: uid = %d, proccnt = %ld\n", uip->ui_uid, uip->ui_proccnt); UIDINFO_UNLOCK(uip); FREE(uip, M_UIDINFO); return; } mtx_unlock(&uihashtbl_mtx); UIDINFO_UNLOCK(uip); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(uip, diff, max) struct uidinfo *uip; int diff; int max; { UIDINFO_LOCK(uip); /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { UIDINFO_UNLOCK(uip); return (0); } uip->ui_proccnt += diff; if (uip->ui_proccnt < 0) printf("negative proccnt for uid = %d\n", uip->ui_uid); UIDINFO_UNLOCK(uip); return (1); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(uip, hiwat, to, max) struct uidinfo *uip; u_int *hiwat; u_int to; rlim_t max; { rlim_t new; UIDINFO_LOCK(uip); new = uip->ui_sbsize + to - *hiwat; /* Don't allow them to exceed max, but allow subtraction. */ if (to > *hiwat && new > max) { UIDINFO_UNLOCK(uip); return (0); } uip->ui_sbsize = new; UIDINFO_UNLOCK(uip); *hiwat = to; if (new < 0) printf("negative sbsize for uid = %d\n", uip->ui_uid); return (1); }