Index: head/sys/kern/kern_fork.c =================================================================== --- head/sys/kern/kern_fork.c (revision 71695) +++ head/sys/kern/kern_fork.c (revision 71696) @@ -1,708 +1,708 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 * $FreeBSD$ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback"); static int fast_vfork = 1; SYSCTL_INT(_kern, OID_AUTO, fast_vfork, CTLFLAG_RW, &fast_vfork, 0, "flag to indicate whether we have a fast vfork()"); /* * These are the stuctures used to create a callout list for things to do * when forking a process */ struct forklist { forklist_fn function; TAILQ_ENTRY(forklist) next; }; TAILQ_HEAD(forklist_head, forklist); static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list); #ifndef _SYS_SYSPROTO_H_ struct fork_args { int dummy; }; #endif /* ARGSUSED */ int fork(p, uap) struct proc *p; struct fork_args *uap; { int error; struct proc *p2; error = fork1(p, RFFDG | RFPROC, &p2); if (error == 0) { p->p_retval[0] = p2->p_pid; p->p_retval[1] = 0; } return error; } /* ARGSUSED */ int vfork(p, uap) struct proc *p; struct vfork_args *uap; { int error; struct proc *p2; error = fork1(p, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2); if (error == 0) { p->p_retval[0] = p2->p_pid; p->p_retval[1] = 0; } return error; } int rfork(p, uap) struct proc *p; struct rfork_args *uap; { int error; struct proc *p2; /* mask kernel only flags out of the user flags */ error = fork1(p, uap->flags & ~RFKERNELONLY, &p2); if (error == 0) { p->p_retval[0] = p2 ? p2->p_pid : 0; p->p_retval[1] = 0; } return error; } int nprocs = 1; /* process 0 */ static int nextpid = 0; /* * Random component to nextpid generation. We mix in a random factor to make * it a little harder to predict. We sanity check the modulus value to avoid * doing it in critical paths. Don't let it be too small or we pointlessly * waste randomness entropy, and don't let it be impossibly large. Using a * modulus that is too big causes a LOT more process table scans and slows * down fork processing as the pidchecked caching is defeated. */ static int randompid = 0; static int sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) { int error, pid; pid = randompid; error = sysctl_handle_int(oidp, &pid, 0, req); if (error || !req->newptr) return (error); if (pid < 0 || pid > PID_MAX - 100) /* out of range */ pid = PID_MAX - 100; else if (pid < 2) /* NOP */ pid = 0; else if (pid < 100) /* Make it reasonable */ pid = 100; randompid = pid; return (error); } SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); int fork1(p1, flags, procp) struct proc *p1; /* parent proc */ int flags; struct proc **procp; /* child proc */ { struct proc *p2, *pptr; uid_t uid; struct proc *newproc; int trypid; int ok; static int pidchecked = 0; struct forklist *ep; /* Can't copy and clear */ if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) return (EINVAL); /* * Here we don't create a new process, but we divorce * certain parts of a process from itself. */ if ((flags & RFPROC) == 0) { vm_fork(p1, 0, flags); /* * Close all file descriptors. */ if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(p1); fdfree(p1); p1->p_fd = fdtmp; } /* * Unshare file descriptors (from parent.) */ if (flags & RFFDG) { if (p1->p_fd->fd_refcnt > 1) { struct filedesc *newfd; newfd = fdcopy(p1); fdfree(p1); p1->p_fd = newfd; } } *procp = NULL; return (0); } /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow * a nonprivileged user to use the last process; don't let root * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ uid = p1->p_cred->p_ruid; if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { tablefull("proc"); return (EAGAIN); } /* * Increment the nprocs resource before blocking can occur. There * are hard-limits as to the number of processes that can run. */ nprocs++; /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. */ ok = chgproccnt(p1->p_cred->p_uidinfo, 1, (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0); if (!ok) { /* * Back out the process count */ nprocs--; return (EAGAIN); } /* Allocate new proc. */ newproc = zalloc(proc_zone); /* * Setup linkage for kernel based threading */ if((flags & RFTHREAD) != 0) { newproc->p_peers = p1->p_peers; p1->p_peers = newproc; newproc->p_leader = p1->p_leader; } else { newproc->p_peers = NULL; newproc->p_leader = newproc; } newproc->p_vmspace = NULL; /* * Find an unused process ID. We remember a range of unused IDs * ready to use (from nextpid+1 through pidchecked-1). * * If RFHIGHPID is set (used during system boot), do not allocate * low-numbered pids. */ ALLPROC_LOCK(AP_EXCLUSIVE); trypid = nextpid + 1; if (flags & RFHIGHPID) { if (trypid < 10) { trypid = 10; } } else { if (randompid) trypid += arc4random() % randompid; } retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (trypid >= PID_MAX) { trypid = trypid % PID_MAX; if (trypid < 100) trypid += 100; pidchecked = 0; } if (trypid >= pidchecked) { int doingzomb = 0; pidchecked = PID_MAX; /* * Scan the active and zombie procs to check whether this pid * is in use. Remember the lowest pid that's greater * than trypid, so we can avoid checking for a while. */ p2 = LIST_FIRST(&allproc); again: for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) { while (p2->p_pid == trypid || p2->p_pgrp->pg_id == trypid || p2->p_session->s_sid == trypid) { trypid++; if (trypid >= pidchecked) goto retry; } if (p2->p_pid > trypid && pidchecked > p2->p_pid) pidchecked = p2->p_pid; if (p2->p_pgrp->pg_id > trypid && pidchecked > p2->p_pgrp->pg_id) pidchecked = p2->p_pgrp->pg_id; if (p2->p_session->s_sid > trypid && pidchecked > p2->p_session->s_sid) pidchecked = p2->p_session->s_sid; } if (!doingzomb) { doingzomb = 1; p2 = LIST_FIRST(&zombproc); goto again; } } /* * RFHIGHPID does not mess with the nextpid counter during boot. */ if (flags & RFHIGHPID) pidchecked = 0; else nextpid = trypid; p2 = newproc; p2->p_intr_nesting_level = 0; p2->p_stat = SIDL; /* protect against others */ p2->p_pid = trypid; LIST_INSERT_HEAD(&allproc, p2, p_list); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); ALLPROC_LOCK(AP_RELEASE); /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ bzero(&p2->p_startzero, (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); bcopy(&p1->p_startcopy, &p2->p_startcopy, (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); mtx_init(&p2->p_mtx, "process lock", MTX_DEF); p2->p_aioinfo = NULL; /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ p2->p_flag = 0; mtx_enter(&sched_lock, MTX_SPIN); p2->p_sflag = PS_INMEM; if (p1->p_sflag & PS_PROFIL) startprofclock(p2); mtx_exit(&sched_lock, MTX_SPIN); MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), M_SUBPROC, M_WAITOK); bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); p2->p_cred->p_refcnt = 1; crhold(p1->p_ucred); uihold(p1->p_cred->p_uidinfo); if (p2->p_prison) { p2->p_prison->pr_ref++; p2->p_flag |= P_JAILED; } if (p2->p_args) p2->p_args->ar_ref++; if (flags & RFSIGSHARE) { p2->p_procsig = p1->p_procsig; p2->p_procsig->ps_refcnt++; if (p1->p_sigacts == &p1->p_addr->u_sigacts) { struct sigacts *newsigacts; int s; /* Create the shared sigacts structure */ MALLOC(newsigacts, struct sigacts *, sizeof(struct sigacts), M_SUBPROC, M_WAITOK); s = splhigh(); /* * Set p_sigacts to the new shared structure. * Note that this is updating p1->p_sigacts at the * same time, since p_sigacts is just a pointer to * the shared p_procsig->ps_sigacts. */ p2->p_sigacts = newsigacts; bcopy(&p1->p_addr->u_sigacts, p2->p_sigacts, sizeof(*p2->p_sigacts)); *p2->p_sigacts = p1->p_addr->u_sigacts; splx(s); } } else { MALLOC(p2->p_procsig, struct procsig *, sizeof(struct procsig), M_SUBPROC, M_WAITOK); bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig)); p2->p_procsig->ps_refcnt = 1; p2->p_sigacts = NULL; /* finished in vm_fork() */ } if (flags & RFLINUXTHPN) p2->p_sigparent = SIGUSR1; else p2->p_sigparent = SIGCHLD; /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) VREF(p2->p_textvp); if (flags & RFCFDG) p2->p_fd = fdinit(p1); else if (flags & RFFDG) p2->p_fd = fdcopy(p1); else p2->p_fd = fdshare(p1); /* * If p_limit is still copy-on-write, bump refcnt, * otherwise get a copy that won't be modified. * (If PL_SHAREMOD is clear, the structure is shared * copy-on-write.) */ if (p1->p_limit->p_lflags & PL_SHAREMOD) p2->p_limit = limcopy(p1->p_limit); else { p2->p_limit = p1->p_limit; p2->p_limit->p_refcnt++; } /* * Preserve some more flags in subprocess. P_PROFIL has already * been preserved. */ p2->p_flag |= p1->p_flag & P_SUGID; if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; if (flags & RFPPWAIT) p2->p_flag |= P_PPWAIT; LIST_INSERT_AFTER(p1, p2, p_pglist); /* * Attach the new process to its parent. * * If RFNOWAIT is set, the newly created process becomes a child * of init. This effectively disassociates the child from the * parent. */ if (flags & RFNOWAIT) pptr = initproc; else pptr = p1; PROCTREE_LOCK(PT_EXCLUSIVE); p2->p_pptr = pptr; LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); PROCTREE_LOCK(PT_RELEASE); LIST_INIT(&p2->p_children); LIST_INIT(&p2->p_heldmtx); LIST_INIT(&p2->p_contested); callout_init(&p2->p_itcallout, 0); callout_init(&p2->p_slpcallout, 1); #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag&KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) VREF(p2->p_tracep); } #endif /* * set priority of child to be that of parent */ p2->p_estcpu = p1->p_estcpu; /* * This begins the section where we must prevent the parent * from being swapped. */ PHOLD(p1); /* * Finish creating the child process. It will return via a different * execution path later. (ie: directly into user mode) */ vm_fork(p1, p2, flags); if (flags == (RFFDG | RFPROC)) { cnt.v_forks++; cnt.v_forkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { cnt.v_vforks++; cnt.v_vforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; } else if (p1 == &proc0) { cnt.v_kthreads++; cnt.v_kthreadpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; } else { cnt.v_rforks++; cnt.v_rforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; } /* * Both processes are set up, now check if any loadable modules want * to adjust anything. * What if they have an error? XXX */ TAILQ_FOREACH(ep, &fork_list, next) { (*ep->function)(p1, p2, flags); } /* * If RFSTOPPED not requested, make child runnable and add to * run queue. */ microtime(&(p2->p_stats->p_start)); p2->p_acflag = AFORK; if ((flags & RFSTOPPED) == 0) { splhigh(); mtx_enter(&sched_lock, MTX_SPIN); p2->p_stat = SRUN; setrunqueue(p2); mtx_exit(&sched_lock, MTX_SPIN); spl0(); } /* * Now can be swapped. */ PRELE(p1); /* * tell any interested parties about the new process */ KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, set P_PPWAIT on child, and sleep on our * proc (in case of exit). */ while (p2->p_flag & P_PPWAIT) tsleep(p1, PWAIT, "ppwait", 0); /* * Return child proc pointer to parent. */ *procp = p2; return (0); } /* * The next two functionms are general routines to handle adding/deleting * items on the fork callout list. * * at_fork(): * Take the arguments given and put them onto the fork callout list, * However first make sure that it's not already there. * Returns 0 on success or a standard error number. */ int at_fork(function) forklist_fn function; { struct forklist *ep; #ifdef INVARIANTS /* let the programmer know if he's been stupid */ if (rm_at_fork(function)) printf("WARNING: fork callout entry (%p) already present\n", function); #endif ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT); if (ep == NULL) return (ENOMEM); ep->function = function; TAILQ_INSERT_TAIL(&fork_list, ep, next); return (0); } /* * Scan the exit callout list for the given item and remove it.. * Returns the number of items removed (0 or 1) */ int rm_at_fork(function) forklist_fn function; { struct forklist *ep; TAILQ_FOREACH(ep, &fork_list, next) { if (ep->function == function) { TAILQ_REMOVE(&fork_list, ep, next); free(ep, M_ATFORK); return(1); } } return (0); } /* * Handle the return of a child process from fork1(). This function * is called from the MD fork_trampoline() entry point. */ void fork_exit(callout, arg, frame) - void *callout(void *, struct trapframe *); + void (*callout)(void *, struct trapframe *); void *arg; struct trapframe *frame; { struct proc *p; mtx_exit(&sched_lock, MTX_SPIN); /* * XXX: We really shouldn't have to do this. */ enable_intr(); #ifdef SMP if (PCPU_GET(switchtime.tv_sec) == 0) microuptime(PCPU_PTR(switchtime)); PCPU_SET(switchticks, ticks); #endif /* * cpu_set_fork_handler intercepts this function call to * have this call a non-return function to stay in kernel mode. * initproc has its own fork handler, but it does return. */ - (*callout)(arg, frame); + callout(arg, frame); /* * Check if a kernel thread misbehaved and returned from its main * function. */ p = CURPROC; if (p->p_flag & P_KTHREAD) { mtx_enter(&Giant, MTX_DEF); printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", p->p_comm, p->p_pid); kthread_exit(0); } mtx_assert(&Giant, MA_NOTOWNED); } /* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. Giant is not held on entry, and must not * be held on return. This function is passed in to fork_exit() as the * first parameter and is called when returning to a new userland process. */ void fork_return(p, frame) struct proc *p; struct trapframe *frame; { userret(p, frame, 0); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { if (!mtx_owned(&Giant)) mtx_enter(&Giant, MTX_DEF); ktrsysret(p->p_tracep, SYS_fork, 0, 0); } #endif if (mtx_owned(&Giant)) mtx_exit(&Giant, MTX_DEF); mtx_assert(&Giant, MA_NOTOWNED); } Index: head/sys/sys/proc.h =================================================================== --- head/sys/sys/proc.h (revision 71695) +++ head/sys/sys/proc.h (revision 71696) @@ -1,546 +1,546 @@ /*- * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD$ */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include /* For struct callout. */ #include /* For struct klist. */ #include #include #include /* For struct rtprio. */ #include #ifndef _KERNEL #include /* For structs itimerval, timeval. */ #endif #include #include /* Machine-dependent proc substruct. */ /* * One structure allocated per session. */ struct session { int s_count; /* Ref cnt; pgrps in session. */ struct proc *s_leader; /* Session leader. */ struct vnode *s_ttyvp; /* Vnode of controlling terminal. */ struct tty *s_ttyp; /* Controlling terminal. */ pid_t s_sid; /* Session ID. */ /* Setlogin() name: */ char s_login[roundup(MAXLOGNAME, sizeof(long))]; }; /* * One structure allocated per process group. */ struct pgrp { LIST_ENTRY(pgrp) pg_hash; /* Hash chain. */ LIST_HEAD(, proc) pg_members; /* Pointer to pgrp members. */ struct session *pg_session; /* Pointer to session. */ struct sigiolst pg_sigiolst; /* List of sigio sources. */ pid_t pg_id; /* Pgrp id. */ int pg_jobc; /* # procs qualifying pgrp for job control */ }; struct procsig { sigset_t ps_sigignore; /* Signals being ignored. */ sigset_t ps_sigcatch; /* Signals being caught by user. */ int ps_flag; struct sigacts *ps_sigacts; /* Signal actions, state. */ int ps_refcnt; }; #define PS_NOCLDWAIT 0x0001 /* No zombies if child dies */ #define PS_NOCLDSTOP 0x0002 /* No SIGCHLD when children stop. */ /* * pasleep structure, used by asleep() syscall to hold requested priority * and timeout values for await(). */ struct pasleep { int as_priority; /* Async priority. */ int as_timo; /* Async timeout. */ }; /* * pargs, used to hold a copy of the command line, if it had a sane length. */ struct pargs { u_int ar_ref; /* Reference count. */ u_int ar_length; /* Length. */ u_char ar_args[0]; /* Arguments. */ }; /*- * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressable except for those marked "(CPU)" below, * which might be addressable only on a processor on which the process * is running. * * Below is a key of locks used to protect each member of struct proc. The * lock is indicated by a reference to a specific character in parens in the * associated comment. * * - not yet protected * a - only touched by curproc or parent during fork/wait * b - created at fork, never chagnes * c - locked by proc mtx * d - locked by allproc_lock lock * e - locked by proc tree lock * f - session mtx * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx * j - locked by sched_lock mtx * k - either by curproc or a lock which prevents the lock from * going away, such as (d,e) * l - the attaching proc or attaching proc parent * m - Giant * n - not locked, lazy */ struct proc { TAILQ_ENTRY(proc) p_procq; /* (j) Run/mutex queue. */ TAILQ_ENTRY(proc) p_slpq; /* (j) Sleep queue. */ LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ /* substructures: */ struct pcred *p_cred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Ptr to open files structure. */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (m) Process limits. */ struct vm_object *p_upages_obj;/* (c) Upages object. */ struct procsig *p_procsig; /* (c) Signal actions, state (CPU). */ #define p_sigacts p_procsig->ps_sigacts #define p_sigignore p_procsig->ps_sigignore #define p_sigcatch p_procsig->ps_sigcatch #define p_ucred p_cred->pc_ucred #define p_rlimit p_limit->pl_rlimit int p_flag; /* (c) P_* flags. */ int p_sflag; /* (j) PS_* flags. */ int p_intr_nesting_level; /* (n) Interrupt recursion. */ char p_stat; /* (j) S* process status. */ char p_pad1[3]; pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (c) List of processes in pgrp. */ struct proc *p_pptr; /* (e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_oppid pid_t p_oppid; /* (c) Save parent pid during ptrace. XXX */ int p_dupfd; /* (c) Sideways ret value from fdopen. XXX */ struct vmspace *p_vmspace; /* (b) Address space. */ /* scheduling */ u_int p_estcpu; /* (j) Time averaged value of p_cpticks. */ int p_cpticks; /* (j) Ticks of cpu time. */ fixpt_t p_pctcpu; /* (j) %cpu during p_swtime. */ struct callout p_slpcallout; /* (h) Callout for sleep. */ void *p_wchan; /* (j) Sleep address. */ const char *p_wmesg; /* (j) Reason for sleep. */ u_int p_swtime; /* (j) Time swapped in or out. */ u_int p_slptime; /* (j) Time since last blocked. */ struct callout p_itcallout; /* (h) Interval timer callout. */ struct itimerval p_realtimer; /* (h?/k?) Alarm timer. */ u_int64_t p_runtime; /* (j) Real time in microsec. */ u_int64_t p_uu; /* (j) Previous user time in microsec. */ u_int64_t p_su; /* (j) Previous system time in microsec. */ u_int64_t p_iu; /* (j) Previous interrupt time in microsec. */ u_int64_t p_uticks; /* (j) Statclock hits in user mode. */ u_int64_t p_sticks; /* (j) Statclock hits in system mode. */ u_int64_t p_iticks; /* (j) Statclock hits processing intr. */ int p_traceflag; /* (j?) Kernel trace points. */ struct vnode *p_tracep; /* (j?) Trace to vnode. */ sigset_t p_siglist; /* (c) Signals arrived but not delivered. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ char p_lock; /* (c) Process lock (prevent swap) count. */ struct mtx p_mtx; /* (k) Lock for this struct. */ u_char p_oncpu; /* (j) Which cpu we are on. */ u_char p_lastcpu; /* (j) Last cpu we were on. */ char p_rqindex; /* (j) Run queue index. */ short p_locks; /* (*) DEBUG: lockmgr count of held locks */ short p_simple_locks; /* (*) DEBUG: count of held simple locks */ u_int p_stops; /* (c) Procfs event bitmask. */ u_int p_stype; /* (c) Procfs stop event type. */ char p_step; /* (c) Procfs stop *once* flag. */ u_char p_pfsflags; /* (c) Procfs flags. */ char p_pad3[2]; /* Alignment. */ register_t p_retval[2]; /* (k) Syscall aux returns. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ int p_sigparent; /* (c) Signal to parent on exit. */ sigset_t p_oldsigmask; /* (c) Saved mask from before sigpause. */ int p_sig; /* (n) For core dump/debugger XXX. */ u_long p_code; /* (n) For core dump/debugger XXX. */ struct klist p_klist; /* (c) Knotes attached to this process. */ LIST_HEAD(, mtx) p_heldmtx; /* (j) For debugging code. */ struct mtx *p_blocked; /* (j) Mutex process is blocked on. */ const char *p_mtxname; /* (j) Name of mutex blocked on. */ LIST_HEAD(, mtx) p_contested; /* (j) Contested locks. */ /* End area that is zeroed on creation. */ #define p_endzero p_startcopy /* The following fields are all copied upon creation in fork. */ #define p_startcopy p_sigmask sigset_t p_sigmask; /* (c) Current signal mask. */ stack_t p_sigstk; /* (c) Stack pointer and on-stack flag. */ int p_magic; /* (b) Magic number. */ u_char p_priority; /* (j) Process priority. */ u_char p_usrpri; /* (j) User priority based on p_cpu and p_nice. */ u_char p_nativepri; /* (j) Priority before propagation. */ char p_nice; /* (j?/k?) Process "nice" value. */ char p_comm[MAXCOMLEN + 1]; /* (b) Process name. */ struct pgrp *p_pgrp; /* (e?/c?) Pointer to process group. */ struct sysentvec *p_sysent; /* (b) System call dispatch information. */ struct rtprio p_rtprio; /* (j) Realtime priority. */ struct prison *p_prison; /* (b?) jail(4). */ struct pargs *p_args; /* (b?) Process arguments. */ /* End area that is copied on creation. */ #define p_endcopy p_addr struct user *p_addr; /* (k) Kernel virtual addr of u-area (CPU). */ struct mdproc p_md; /* (k) Any machine-dependent fields. */ u_short p_xstat; /* (c) Exit status for wait; also stop sig. */ u_short p_acflag; /* (c) Accounting flags. */ struct rusage *p_ru; /* (a) Exit information. XXX */ void *p_aioinfo; /* (c) ASYNC I/O info. */ struct proc *p_peers; /* (c) */ struct proc *p_leader; /* (c) */ struct pasleep p_asleep; /* (k) Used by asleep()/await(). */ void *p_emuldata; /* (c) Emulator state data. */ struct ithd *p_ithd; /* (b) For interrupt threads only. */ }; #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id /* Status values (p_stat). */ #define SIDL 1 /* Process being created by fork. */ #define SRUN 2 /* Currently runnable. */ #define SSLEEP 3 /* Sleeping on an address. */ #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ #define SWAIT 6 /* Waiting for interrupt. */ #define SMTX 7 /* Blocked on a mutex. */ /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ #define P_CONTROLT 0x00002 /* Has a controlling terminal. */ #define P_KTHREAD 0x00004 /* Kernel thread. */ #define P_NOLOAD 0x00008 /* Ignore during load avg calculations. */ #define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ #define P_SELECT 0x00040 /* Selecting; wakeup/waiting danger. */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Debugging process has waited for child. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ /* Should be moved to machine-dependent areas. */ #define P_BUFEXHAUST 0x100000 /* Dirty buffers flush is in progress. */ #define P_COWINPROGRESS 0x400000 /* Snapshot copy-on-write in progress. */ #define P_DEADLKTREAT 0x800000 /* Lock aquisition - deadlock treatment. */ #define P_JAILED 0x1000000 /* Process is in jail. */ #define P_OLDMASK 0x2000000 /* Need to restore mask after suspend. */ #define P_ALTSTACK 0x4000000 /* Have alternate signal stack. */ /* These flags are kept in p_sflag and are protected with sched_lock. */ #define PS_INMEM 0x00001 /* Loaded into memory. */ #define PS_OWEUPC 0x00002 /* Owe process an addupc() call at next ast. */ #define PS_PROFIL 0x00004 /* Has started profiling. */ #define PS_SINTR 0x00008 /* Sleep is interruptible. */ #define PS_TIMEOUT 0x00010 /* Timing out during sleep. */ #define PS_ALRMPEND 0x00020 /* Pending SIGVTALRM needs to be posted. */ #define PS_PROFPEND 0x00040 /* Pending SIGPROF needs to be posted. */ #define PS_CVWAITQ 0x00080 /* Proces is on a cv_waitq (not slpq). */ #define PS_SWAPINREQ 0x00100 /* Swapin request due to wakeup. */ #define PS_SWAPPING 0x00200 /* Process is being swapped. */ #define P_MAGIC 0xbeefface #define P_CAN_SEE 1 #define P_CAN_KILL 2 #define P_CAN_SCHED 3 #define P_CAN_DEBUG 4 /* * MOVE TO ucred.h? * * Shareable process credentials (always resident). This includes a reference * to the current user credentials as well as real and saved ids that may be * used to change ids. */ struct pcred { struct ucred *pc_ucred; /* Current credentials. */ uid_t p_ruid; /* Real user id. */ uid_t p_svuid; /* Saved effective user id. */ gid_t p_rgid; /* Real group id. */ gid_t p_svgid; /* Saved effective group id. */ int p_refcnt; /* Number of references. */ struct uidinfo *p_uidinfo; /* Per uid resource consumption. */ }; /* * Describe an interrupt thread. There is one of these per irq. BSD/OS makes * this a superset of struct proc, i.e. it_proc is the struct itself and not a * pointer. We point in both directions, because it feels good that way. */ struct ithd { struct proc *it_proc; /* Interrupt process. */ LIST_ENTRY(ithd) it_list; /* All interrupt threads. */ int it_need; /* Needs service. */ int irq; /* Vector. */ struct intrhand *it_ih; /* Interrupt handlers. */ struct ithd *it_interrupted; /* Who we interrupted. */ void *it_md; /* Hook for MD interrupt code. */ }; #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PARGS); MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); MALLOC_DECLARE(M_ZOMBIE); #endif static __inline int sigonstack(size_t sp) { register struct proc *p = curproc; return ((p->p_flag & P_ALTSTACK) ? #if defined(COMPAT_43) || defined(COMPAT_SUNOS) ((p->p_sigstk.ss_size == 0) ? (p->p_sigstk.ss_flags & SS_ONSTACK) : ((sp - (size_t)p->p_sigstk.ss_sp) < p->p_sigstk.ss_size)) #else ((sp - (size_t)p->p_sigstk.ss_sp) < p->p_sigstk.ss_size) #endif : 0); } /* Handy macro to determine if p1 can mangle p2. */ #define PRISON_CHECK(p1, p2) \ ((p1)->p_prison == NULL || (p1)->p_prison == (p2)->p_prison) /* * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, * as it is used to represent "no process group". */ #define PID_MAX 99999 #define NO_PID 100000 #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) #define SESSHOLD(s) ((s)->s_count++) #define SESSRELE(s) { \ if (--(s)->s_count == 0) \ FREE(s, M_SESSION); \ } /* STOPEVENT() is MP safe. */ #define STOPEVENT(p, e, v) do { \ PROC_LOCK(p); \ if ((p)->p_stops & (e)) { \ stopevent((p), (e), (v)); \ } \ PROC_UNLOCK(p); \ } while (0) /* Lock and unlock a process. */ #define PROC_LOCK(p) mtx_enter(&(p)->p_mtx, MTX_DEF) #define PROC_UNLOCK(p) mtx_exit(&(p)->p_mtx, MTX_DEF) /* Lock and unlock the proc lists. */ #define ALLPROC_LOCK(how) \ lockmgr(&allproc_lock, (how), NULL, CURPROC) #define AP_SHARED LK_SHARED #define AP_EXCLUSIVE LK_EXCLUSIVE #define AP_RELEASE LK_RELEASE /* Lock and unlock the proc child and sibling lists. */ #define PROCTREE_LOCK(how) \ lockmgr(&proctree_lock, (how), NULL, CURPROC) #define PROCTREE_ASSERT(what) \ LOCKMGR_ASSERT(&proctree_lock, (what), CURPROC) #define PT_SHARED LK_SHARED #define PT_EXCLUSIVE LK_EXCLUSIVE #define PT_RELEASE LK_RELEASE /* Hold process U-area in memory, normally for ptrace/procfs work. */ #define PHOLD(p) do { \ PROC_LOCK(p); \ if ((p)->p_lock++ == 0) \ faultin(p); \ PROC_UNLOCK(p); \ } while (0) #define PRELE(p) do { \ PROC_LOCK(p); \ (--(p)->p_lock); \ PROC_UNLOCK(p); \ } while (0) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash; #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; extern struct lock allproc_lock; extern struct lock proctree_lock; extern struct proc proc0; /* Process slot for swapper. */ extern int hogticks; /* Limit on kernel cpu hogs. */ extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Max procs per uid. */ extern u_long ps_arg_cache_limit; extern int ps_argsopen; extern int ps_showallprocs; extern int sched_quantum; /* Scheduling quantum in ticks. */ LIST_HEAD(proclist, proc); extern struct proclist allproc; /* List of all processes. */ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ extern struct proc *updateproc; /* Process slot for syncer (sic). */ #define NQS 32 /* 32 run queues. */ TAILQ_HEAD(rq, proc); extern struct rq itqueues[]; extern struct rq rtqueues[]; extern struct rq queues[]; extern struct rq idqueues[]; extern struct vm_zone *proc_zone; /* * XXX macros for scheduler. Shouldn't be here, but currently needed for * bounding the dubious p_estcpu inheritance in wait1(). * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in * the range 100-256 Hz (approximately). */ #define ESTCPULIM(e) \ min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \ PPQ) + INVERSE_ESTCPU_WEIGHT - 1) #define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */ #define NICE_WEIGHT 1 /* Priorities per nice level. */ #define PPQ (128 / NQS) /* Priorities per queue. */ struct mtx; struct trapframe; struct proc *pfind __P((pid_t)); /* Find process by id. */ struct pgrp *pgfind __P((pid_t)); /* Find process group by id. */ struct proc *zpfind __P((pid_t)); /* Find zombie process by id. */ struct proc *chooseproc __P((void)); int enterpgrp __P((struct proc *p, pid_t pgid, int mksess)); void faultin __P((struct proc *p)); void fixjobc __P((struct proc *p, struct pgrp *pgrp, int entering)); int fork1 __P((struct proc *, int, struct proc **)); -void fork_exit __P((void *(void *, struct trapframe *), void *, +void fork_exit __P((void (*)(void *, struct trapframe *), void *, struct trapframe *)); void fork_return __P((struct proc *, struct trapframe *)); int inferior __P((struct proc *p)); int leavepgrp __P((struct proc *p)); void mi_switch __P((void)); int p_can __P((const struct proc *p1, const struct proc *p2, int operation, int *privused)); int p_trespass __P((struct proc *p1, struct proc *p2)); void procinit __P((void)); void proc_reparent __P((struct proc *child, struct proc *newparent)); u_int32_t procrunnable __P((void)); void remrunqueue __P((struct proc *)); void resetpriority __P((struct proc *)); int roundrobin_interval __P((void)); void schedclock __P((struct proc *)); void setrunnable __P((struct proc *)); void setrunqueue __P((struct proc *)); void setsugid __P((struct proc *p)); void sleepinit __P((void)); void stopevent __P((struct proc *, u_int, u_int)); void cpu_idle __P((void)); void cpu_switch __P((void)); void cpu_throw __P((void)) __dead2; void unsleep __P((struct proc *)); void updatepri __P((struct proc *)); void userret __P((struct proc *, struct trapframe *, u_quad_t)); void maybe_resched __P((struct proc *)); void cpu_exit __P((struct proc *)) __dead2; void exit1 __P((struct proc *, int)) __dead2; void cpu_fork __P((struct proc *, struct proc *, int)); void cpu_set_fork_handler __P((struct proc *, void (*)(void *), void *)); int trace_req __P((struct proc *)); void cpu_wait __P((struct proc *)); int cpu_coredump __P((struct proc *, struct vnode *, struct ucred *)); #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */