diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index abc3206c5a26..ea34631995b1 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1,3430 +1,3428 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktrace.h" #include "opt_kstack_pages.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #endif SDT_PROVIDER_DEFINE(proc); MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); MALLOC_DEFINE(M_SESSION, "session", "session header"); static MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); static void fixjobc_enterpgrp(struct proc *p, struct pgrp *pgrp); static void doenterpgrp(struct proc *, struct pgrp *); static void orphanpg(struct pgrp *pg); static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp); static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp); static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread); static void pgadjustjobc(struct pgrp *pgrp, bool entering); static void pgdelete(struct pgrp *); static int proc_ctor(void *mem, int size, void *arg, int flags); static void proc_dtor(void *mem, int size, void *arg); static int proc_init(void *mem, int size, int flags); static void proc_fini(void *mem, int size); static void pargs_free(struct pargs *pa); /* * Other process lists */ struct pidhashhead *pidhashtbl; struct sx *pidhashtbl_lock; u_long pidhash; u_long pidhashlock; struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct sx __exclusive_cache_line allproc_lock; struct sx __exclusive_cache_line proctree_lock; struct mtx __exclusive_cache_line ppeers_lock; struct mtx __exclusive_cache_line procid_lock; uma_zone_t proc_zone; /* * The offset of various fields in struct proc and struct thread. * These are used by kernel debuggers to enumerate kernel threads and * processes. */ const int proc_off_p_pid = offsetof(struct proc, p_pid); const int proc_off_p_comm = offsetof(struct proc, p_comm); const int proc_off_p_list = offsetof(struct proc, p_list); const int proc_off_p_hash = offsetof(struct proc, p_hash); const int proc_off_p_threads = offsetof(struct proc, p_threads); const int thread_off_td_tid = offsetof(struct thread, td_tid); const int thread_off_td_name = offsetof(struct thread, td_name); const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu); const int thread_off_td_pcb = offsetof(struct thread, td_pcb); const int thread_off_td_plist = offsetof(struct thread, td_plist); EVENTHANDLER_LIST_DEFINE(process_ctor); EVENTHANDLER_LIST_DEFINE(process_dtor); EVENTHANDLER_LIST_DEFINE(process_init); EVENTHANDLER_LIST_DEFINE(process_fini); EVENTHANDLER_LIST_DEFINE(process_exit); EVENTHANDLER_LIST_DEFINE(process_fork); EVENTHANDLER_LIST_DEFINE(process_exec); int kstack_pages = KSTACK_PAGES; SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, "Kernel stack size in pages"); static int vmmap_skip_res_cnt = 0; SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW, &vmmap_skip_res_cnt, 0, "Skip calculation of the pages resident count in kern.proc.vmmap"); CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); #ifdef COMPAT_FREEBSD32 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE); #endif /* * Initialize global process hashing structures. */ void procinit(void) { u_long i; sx_init(&allproc_lock, "allproc"); sx_init(&proctree_lock, "proctree"); mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF); mtx_init(&procid_lock, "procid", NULL, MTX_DEF); LIST_INIT(&allproc); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); pidhashlock = (pidhash + 1) / 64; if (pidhashlock > 0) pidhashlock--; pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1), M_PROC, M_WAITOK | M_ZERO); for (i = 0; i < pidhashlock + 1; i++) sx_init_flags(&pidhashtbl_lock[i], "pidhash", SX_DUPOK); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); proc_zone = uma_zcreate("PROC", sched_sizeof_proc(), proc_ctor, proc_dtor, proc_init, proc_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uihashinit(); } /* * Prepare a proc for use. */ static int proc_ctor(void *mem, int size, void *arg, int flags) { struct proc *p; struct thread *td; p = (struct proc *)mem; #ifdef KDTRACE_HOOKS kdtrace_proc_ctor(p); #endif EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); td = FIRST_THREAD_IN_PROC(p); if (td != NULL) { /* Make sure all thread constructors are executed */ EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); } return (0); } /* * Reclaim a proc after use. */ static void proc_dtor(void *mem, int size, void *arg) { struct proc *p; struct thread *td; /* INVARIANTS checks go here */ p = (struct proc *)mem; td = FIRST_THREAD_IN_PROC(p); if (td != NULL) { #ifdef INVARIANTS KASSERT((p->p_numthreads == 1), ("bad number of threads in exiting process")); KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr")); #endif /* Free all OSD associated to this thread. */ osd_thread_exit(td); td_softdep_cleanup(td); MPASS(td->td_su == NULL); /* Make sure all thread destructors are executed */ EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td); } EVENTHANDLER_DIRECT_INVOKE(process_dtor, p); #ifdef KDTRACE_HOOKS kdtrace_proc_dtor(p); #endif if (p->p_ksi != NULL) KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue")); } /* * Initialize type-stable parts of a proc (when newly created). */ static int proc_init(void *mem, int size, int flags) { struct proc *p; p = (struct proc *)mem; mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW); mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW); cv_init(&p->p_pwait, "ppwait"); TAILQ_INIT(&p->p_threads); /* all threads in proc */ EVENTHANDLER_DIRECT_INVOKE(process_init, p); p->p_stats = pstats_alloc(); p->p_pgrp = NULL; return (0); } /* * UMA should ensure that this function is never called. * Freeing a proc structure would violate type stability. */ static void proc_fini(void *mem, int size) { #ifdef notnow struct proc *p; p = (struct proc *)mem; EVENTHANDLER_DIRECT_INVOKE(process_fini, p); pstats_free(p->p_stats); thread_free(FIRST_THREAD_IN_PROC(p)); mtx_destroy(&p->p_mtx); if (p->p_ksi != NULL) ksiginfo_free(p->p_ksi); #else panic("proc reclaimed"); #endif } /* * PID space management. * * These bitmaps are used by fork_findpid. */ bitstr_t bit_decl(proc_id_pidmap, PID_MAX); bitstr_t bit_decl(proc_id_grpidmap, PID_MAX); bitstr_t bit_decl(proc_id_sessidmap, PID_MAX); bitstr_t bit_decl(proc_id_reapmap, PID_MAX); static bitstr_t *proc_id_array[] = { proc_id_pidmap, proc_id_grpidmap, proc_id_sessidmap, proc_id_reapmap, }; void proc_id_set(int type, pid_t id) { KASSERT(type >= 0 && type < nitems(proc_id_array), ("invalid type %d\n", type)); mtx_lock(&procid_lock); KASSERT(bit_test(proc_id_array[type], id) == 0, ("bit %d already set in %d\n", id, type)); bit_set(proc_id_array[type], id); mtx_unlock(&procid_lock); } void proc_id_set_cond(int type, pid_t id) { KASSERT(type >= 0 && type < nitems(proc_id_array), ("invalid type %d\n", type)); if (bit_test(proc_id_array[type], id)) return; mtx_lock(&procid_lock); bit_set(proc_id_array[type], id); mtx_unlock(&procid_lock); } void proc_id_clear(int type, pid_t id) { KASSERT(type >= 0 && type < nitems(proc_id_array), ("invalid type %d\n", type)); mtx_lock(&procid_lock); KASSERT(bit_test(proc_id_array[type], id) != 0, ("bit %d not set in %d\n", id, type)); bit_clear(proc_id_array[type], id); mtx_unlock(&procid_lock); } /* * Is p an inferior of the current process? */ int inferior(struct proc *p) { sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); for (; p != curproc; p = proc_realparent(p)) { if (p->p_pid == 0) return (0); } return (1); } /* * Shared lock all the pid hash lists. */ void pidhash_slockall(void) { u_long i; for (i = 0; i < pidhashlock + 1; i++) sx_slock(&pidhashtbl_lock[i]); } /* * Shared unlock all the pid hash lists. */ void pidhash_sunlockall(void) { u_long i; for (i = 0; i < pidhashlock + 1; i++) sx_sunlock(&pidhashtbl_lock[i]); } /* * Similar to pfind_any(), this function finds zombies. */ struct proc * pfind_any_locked(pid_t pid) { struct proc *p; sx_assert(PIDHASHLOCK(pid), SX_LOCKED); LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); p = NULL; } break; } } return (p); } /* * Locate a process by number. * * By not returning processes in the PRS_NEW state, we allow callers to avoid * testing for that condition to avoid dereferencing p_ucred, et al. */ static __always_inline struct proc * _pfind(pid_t pid, bool zombie) { struct proc *p; p = curproc; if (p->p_pid == pid) { PROC_LOCK(p); return (p); } sx_slock(PIDHASHLOCK(pid)); LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { PROC_LOCK(p); if (p->p_state == PRS_NEW || (!zombie && p->p_state == PRS_ZOMBIE)) { PROC_UNLOCK(p); p = NULL; } break; } } sx_sunlock(PIDHASHLOCK(pid)); return (p); } struct proc * pfind(pid_t pid) { return (_pfind(pid, false)); } /* * Same as pfind but allow zombies. */ struct proc * pfind_any(pid_t pid) { return (_pfind(pid, true)); } /* * Locate a process group by number. * The caller must hold proctree_lock. */ struct pgrp * pgfind(pid_t pgid) { struct pgrp *pgrp; sx_assert(&proctree_lock, SX_LOCKED); LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) { if (pgrp->pg_id == pgid) { PGRP_LOCK(pgrp); return (pgrp); } } return (NULL); } /* * Locate process and do additional manipulations, depending on flags. */ int pget(pid_t pid, int flags, struct proc **pp) { struct proc *p; struct thread *td1; int error; p = curproc; if (p->p_pid == pid) { PROC_LOCK(p); } else { p = NULL; if (pid <= PID_MAX) { if ((flags & PGET_NOTWEXIT) == 0) p = pfind_any(pid); else p = pfind(pid); } else if ((flags & PGET_NOTID) == 0) { td1 = tdfind(pid, -1); if (td1 != NULL) p = td1->td_proc; } if (p == NULL) return (ESRCH); if ((flags & PGET_CANSEE) != 0) { error = p_cansee(curthread, p); if (error != 0) goto errout; } } if ((flags & PGET_CANDEBUG) != 0) { error = p_candebug(curthread, p); if (error != 0) goto errout; } if ((flags & PGET_ISCURRENT) != 0 && curproc != p) { error = EPERM; goto errout; } if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto errout; } if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) { /* * XXXRW: Not clear ESRCH is the right error during proc * execve(). */ error = ESRCH; goto errout; } if ((flags & PGET_HOLD) != 0) { _PHOLD(p); PROC_UNLOCK(p); } *pp = p; return (0); errout: PROC_UNLOCK(p); return (error); } /* * Create a new process group. * pgid must be equal to the pid of p. * Begin a new session if required. */ int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess) { sx_assert(&proctree_lock, SX_XLOCKED); KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL")); KASSERT(p->p_pid == pgid, ("enterpgrp: new pgrp and pid != pgid")); KASSERT(pgfind(pgid) == NULL, ("enterpgrp: pgrp with pgid exists")); KASSERT(!SESS_LEADER(p), ("enterpgrp: session leader attempted setpgrp")); mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); if (sess != NULL) { /* * new session */ mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF); PROC_LOCK(p); p->p_flag &= ~P_CONTROLT; PROC_UNLOCK(p); PGRP_LOCK(pgrp); sess->s_leader = p; sess->s_sid = p->p_pid; proc_id_set(PROC_ID_SESSION, p->p_pid); refcount_init(&sess->s_count, 1); sess->s_ttyvp = NULL; sess->s_ttydp = NULL; sess->s_ttyp = NULL; bcopy(p->p_session->s_login, sess->s_login, sizeof(sess->s_login)); pgrp->pg_session = sess; KASSERT(p == curproc, ("enterpgrp: mksession and p != curproc")); } else { pgrp->pg_session = p->p_session; sess_hold(pgrp->pg_session); PGRP_LOCK(pgrp); } pgrp->pg_id = pgid; proc_id_set(PROC_ID_GROUP, p->p_pid); LIST_INIT(&pgrp->pg_members); /* * As we have an exclusive lock of proctree_lock, * this should not deadlock. */ LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); pgrp->pg_jobc = 0; SLIST_INIT(&pgrp->pg_sigiolst); PGRP_UNLOCK(pgrp); doenterpgrp(p, pgrp); return (0); } /* * Move p to an existing process group */ int enterthispgrp(struct proc *p, struct pgrp *pgrp) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED); KASSERT(pgrp->pg_session == p->p_session, - ("%s: pgrp's session %p, p->p_session %p.\n", - __func__, - pgrp->pg_session, - p->p_session)); + ("%s: pgrp's session %p, p->p_session %p proc %p\n", + __func__, pgrp->pg_session, p->p_session, p)); KASSERT(pgrp != p->p_pgrp, - ("%s: p belongs to pgrp.", __func__)); + ("%s: p %p belongs to pgrp %p", __func__, p, pgrp)); doenterpgrp(p, pgrp); return (0); } /* * If true, any child of q which belongs to group pgrp, qualifies the * process group pgrp as not orphaned. */ static bool isjobproc(struct proc *q, struct pgrp *pgrp) { sx_assert(&proctree_lock, SX_LOCKED); return (q->p_pgrp != pgrp && q->p_pgrp->pg_session == pgrp->pg_session); } static struct proc * jobc_reaper(struct proc *p) { struct proc *pp; sx_assert(&proctree_lock, SX_LOCKED); for (pp = p;;) { pp = pp->p_reaper; if (pp->p_reaper == pp || (pp->p_treeflag & P_TREE_GRPEXITED) == 0) return (pp); } } static struct proc * jobc_parent(struct proc *p) { struct proc *pp; sx_assert(&proctree_lock, SX_LOCKED); pp = proc_realparent(p); if (pp->p_pptr == NULL || (pp->p_treeflag & P_TREE_GRPEXITED) == 0) return (pp); return (jobc_reaper(pp)); } #ifdef INVARIANTS static void check_pgrp_jobc(struct pgrp *pgrp) { struct proc *q; int cnt; sx_assert(&proctree_lock, SX_LOCKED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); cnt = 0; PGRP_LOCK(pgrp); LIST_FOREACH(q, &pgrp->pg_members, p_pglist) { if ((q->p_treeflag & P_TREE_GRPEXITED) != 0 || q->p_pptr == NULL) continue; if (isjobproc(jobc_parent(q), pgrp)) cnt++; } KASSERT(pgrp->pg_jobc == cnt, ("pgrp %d %p pg_jobc %d cnt %d", pgrp->pg_id, pgrp, pgrp->pg_jobc, cnt)); PGRP_UNLOCK(pgrp); } #endif /* * Move p to a process group */ static void doenterpgrp(struct proc *p, struct pgrp *pgrp) { struct pgrp *savepgrp; sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED); savepgrp = p->p_pgrp; #ifdef INVARIANTS check_pgrp_jobc(pgrp); check_pgrp_jobc(savepgrp); #endif /* * Adjust eligibility of affected pgrps to participate in job control. */ fixjobc_enterpgrp(p, pgrp); PGRP_LOCK(pgrp); PGRP_LOCK(savepgrp); PROC_LOCK(p); LIST_REMOVE(p, p_pglist); p->p_pgrp = pgrp; PROC_UNLOCK(p); LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); PGRP_UNLOCK(savepgrp); PGRP_UNLOCK(pgrp); if (LIST_EMPTY(&savepgrp->pg_members)) pgdelete(savepgrp); } /* * remove process from process group */ int leavepgrp(struct proc *p) { struct pgrp *savepgrp; sx_assert(&proctree_lock, SX_XLOCKED); savepgrp = p->p_pgrp; PGRP_LOCK(savepgrp); PROC_LOCK(p); LIST_REMOVE(p, p_pglist); p->p_pgrp = NULL; PROC_UNLOCK(p); PGRP_UNLOCK(savepgrp); if (LIST_EMPTY(&savepgrp->pg_members)) pgdelete(savepgrp); return (0); } /* * delete a process group */ static void pgdelete(struct pgrp *pgrp) { struct session *savesess; struct tty *tp; sx_assert(&proctree_lock, SX_XLOCKED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pgid. The proctree lock ensures that * new sigio structures will not be added after this point. */ funsetownlst(&pgrp->pg_sigiolst); PGRP_LOCK(pgrp); tp = pgrp->pg_session->s_ttyp; LIST_REMOVE(pgrp, pg_hash); savesess = pgrp->pg_session; PGRP_UNLOCK(pgrp); /* Remove the reference to the pgrp before deallocating it. */ if (tp != NULL) { tty_lock(tp); tty_rel_pgrp(tp, pgrp); } proc_id_clear(PROC_ID_GROUP, pgrp->pg_id); mtx_destroy(&pgrp->pg_mtx); free(pgrp, M_PGRP); sess_release(savesess); } static void pgadjustjobc(struct pgrp *pgrp, bool entering) { PGRP_LOCK(pgrp); if (entering) { MPASS(pgrp->pg_jobc >= 0); pgrp->pg_jobc++; } else { MPASS(pgrp->pg_jobc > 0); --pgrp->pg_jobc; if (pgrp->pg_jobc == 0) orphanpg(pgrp); } PGRP_UNLOCK(pgrp); } static void fixjobc_enterpgrp_q(struct pgrp *pgrp, struct proc *p, struct proc *q, bool adj) { struct pgrp *childpgrp; bool future_jobc; sx_assert(&proctree_lock, SX_LOCKED); if ((q->p_treeflag & P_TREE_GRPEXITED) != 0) return; childpgrp = q->p_pgrp; future_jobc = childpgrp != pgrp && childpgrp->pg_session == pgrp->pg_session; if ((adj && !isjobproc(p, childpgrp) && future_jobc) || (!adj && isjobproc(p, childpgrp) && !future_jobc)) pgadjustjobc(childpgrp, adj); } /* * Adjust pgrp jobc counters when specified process changes process group. * We count the number of processes in each process group that "qualify" * the group for terminal job control (those with a parent in a different * process group of the same session). If that count reaches zero, the * process group becomes orphaned. Check both the specified process' * process group and that of its children. * We increment eligibility counts before decrementing, otherwise we * could reach 0 spuriously during the decrement. */ static void fixjobc_enterpgrp(struct proc *p, struct pgrp *pgrp) { struct proc *q; sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); if (p->p_pgrp == pgrp) return; if (isjobproc(jobc_parent(p), pgrp)) pgadjustjobc(pgrp, true); LIST_FOREACH(q, &p->p_children, p_sibling) { if ((q->p_treeflag & P_TREE_ORPHANED) != 0) continue; fixjobc_enterpgrp_q(pgrp, p, q, true); } LIST_FOREACH(q, &p->p_orphans, p_orphan) fixjobc_enterpgrp_q(pgrp, p, q, true); if (isjobproc(jobc_parent(p), p->p_pgrp)) pgadjustjobc(p->p_pgrp, false); LIST_FOREACH(q, &p->p_children, p_sibling) { if ((q->p_treeflag & P_TREE_ORPHANED) != 0) continue; fixjobc_enterpgrp_q(pgrp, p, q, false); } LIST_FOREACH(q, &p->p_orphans, p_orphan) fixjobc_enterpgrp_q(pgrp, p, q, false); } static void fixjobc_kill_q(struct proc *p, struct proc *q, bool adj) { struct pgrp *childpgrp; sx_assert(&proctree_lock, SX_LOCKED); if ((q->p_treeflag & P_TREE_GRPEXITED) != 0) return; childpgrp = q->p_pgrp; if ((adj && isjobproc(jobc_reaper(q), childpgrp) && !isjobproc(p, childpgrp)) || (!adj && !isjobproc(jobc_reaper(q), childpgrp) && isjobproc(p, childpgrp))) pgadjustjobc(childpgrp, adj); } static void fixjobc_kill(struct proc *p) { struct proc *q; struct pgrp *pgrp; sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); pgrp = p->p_pgrp; PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); #ifdef INVARIANTS check_pgrp_jobc(pgrp); #endif /* * p no longer affects process group orphanage for children. * It is marked by the flag because p is only physically * removed from its process group on wait(2). */ MPASS((p->p_treeflag & P_TREE_GRPEXITED) == 0); p->p_treeflag |= P_TREE_GRPEXITED; /* * Check p's parent to see whether p qualifies its own process * group; if so, adjust count for p's process group. */ if (isjobproc(jobc_parent(p), pgrp)) pgadjustjobc(pgrp, false); /* * Check this process' children to see whether they qualify * their process groups after reparenting to reaper. If so, * adjust counts for children's process groups. */ LIST_FOREACH(q, &p->p_children, p_sibling) { if ((q->p_treeflag & P_TREE_ORPHANED) != 0) continue; fixjobc_kill_q(p, q, true); } LIST_FOREACH(q, &p->p_orphans, p_orphan) fixjobc_kill_q(p, q, true); LIST_FOREACH(q, &p->p_children, p_sibling) { if ((q->p_treeflag & P_TREE_ORPHANED) != 0) continue; fixjobc_kill_q(p, q, false); } LIST_FOREACH(q, &p->p_orphans, p_orphan) fixjobc_kill_q(p, q, false); #ifdef INVARIANTS check_pgrp_jobc(pgrp); #endif } void killjobc(void) { struct session *sp; struct tty *tp; struct proc *p; struct vnode *ttyvp; p = curproc; MPASS(p->p_flag & P_WEXIT); sx_assert(&proctree_lock, SX_LOCKED); if (SESS_LEADER(p)) { sp = p->p_session; /* * s_ttyp is not zero'd; we use this to indicate that * the session once had a controlling terminal. (for * logging and informational purposes) */ SESS_LOCK(sp); ttyvp = sp->s_ttyvp; tp = sp->s_ttyp; sp->s_ttyvp = NULL; sp->s_ttydp = NULL; sp->s_leader = NULL; SESS_UNLOCK(sp); /* * Signal foreground pgrp and revoke access to * controlling terminal if it has not been revoked * already. * * Because the TTY may have been revoked in the mean * time and could already have a new session associated * with it, make sure we don't send a SIGHUP to a * foreground process group that does not belong to this * session. */ if (tp != NULL) { tty_lock(tp); if (tp->t_session == sp) tty_signal_pgrp(tp, SIGHUP); tty_unlock(tp); } if (ttyvp != NULL) { sx_xunlock(&proctree_lock); if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp); } devfs_ctty_unref(ttyvp); sx_xlock(&proctree_lock); } } fixjobc_kill(p); } /* * A process group has become orphaned; * if there are any stopped processes in the group, * hang-up all process in that group. */ static void orphanpg(struct pgrp *pg) { struct proc *p; PGRP_LOCK_ASSERT(pg, MA_OWNED); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (P_SHOULDSTOP(p) == P_STOPPED_SIG) { PROC_UNLOCK(p); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); kern_psignal(p, SIGHUP); kern_psignal(p, SIGCONT); PROC_UNLOCK(p); } return; } PROC_UNLOCK(p); } } void sess_hold(struct session *s) { refcount_acquire(&s->s_count); } void sess_release(struct session *s) { if (refcount_release(&s->s_count)) { if (s->s_ttyp != NULL) { tty_lock(s->s_ttyp); tty_rel_sess(s->s_ttyp, s); } proc_id_clear(PROC_ID_SESSION, s->s_sid); mtx_destroy(&s->s_mtx); free(s, M_SESSION); } } #ifdef DDB static void db_print_pgrp_one(struct pgrp *pgrp, struct proc *p) { db_printf( " pid %d at %p pr %d pgrp %p e %d jc %d\n", p->p_pid, p, p->p_pptr == NULL ? -1 : p->p_pptr->p_pid, p->p_pgrp, (p->p_treeflag & P_TREE_GRPEXITED) != 0, p->p_pptr == NULL ? 0 : isjobproc(p->p_pptr, pgrp)); } DB_SHOW_COMMAND(pgrpdump, pgrpdump) { struct pgrp *pgrp; struct proc *p; int i; for (i = 0; i <= pgrphash; i++) { if (!LIST_EMPTY(&pgrphashtbl[i])) { db_printf("indx %d\n", i); LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) { db_printf( " pgrp %p, pgid %d, sess %p, sesscnt %d, mem %p\n", pgrp, (int)pgrp->pg_id, pgrp->pg_session, pgrp->pg_session->s_count, LIST_FIRST(&pgrp->pg_members)); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) db_print_pgrp_one(pgrp, p); } } } } #endif /* DDB */ /* * Calculate the kinfo_proc members which contain process-wide * informations. * Must be called with the target process locked. */ static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); kp->ki_estcpu = 0; kp->ki_pctcpu = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); kp->ki_pctcpu += sched_pctcpu(td); kp->ki_estcpu += sched_estcpu(td); thread_unlock(td); } } /* * Clear kinfo_proc and fill in any information that is common * to all threads in the process. * Must be called with the target process locked. */ static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) { struct thread *td0; struct tty *tp; struct session *sp; struct ucred *cred; struct sigacts *ps; struct timeval boottime; PROC_LOCK_ASSERT(p, MA_OWNED); bzero(kp, sizeof(*kp)); kp->ki_structsize = sizeof(*kp); kp->ki_paddr = p; kp->ki_addr =/* p->p_addr; */0; /* XXX */ kp->ki_args = p->p_args; kp->ki_textvp = p->p_textvp; #ifdef KTRACE kp->ki_tracep = p->p_tracevp; kp->ki_traceflag = p->p_traceflag; #endif kp->ki_fd = p->p_fd; kp->ki_pd = p->p_pd; kp->ki_vmspace = p->p_vmspace; kp->ki_flag = p->p_flag; kp->ki_flag2 = p->p_flag2; cred = p->p_ucred; if (cred) { kp->ki_uid = cred->cr_uid; kp->ki_ruid = cred->cr_ruid; kp->ki_svuid = cred->cr_svuid; kp->ki_cr_flags = 0; if (cred->cr_flags & CRED_FLAG_CAPMODE) kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE; /* XXX bde doesn't like KI_NGROUPS */ if (cred->cr_ngroups > KI_NGROUPS) { kp->ki_ngroups = KI_NGROUPS; kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; } else kp->ki_ngroups = cred->cr_ngroups; bcopy(cred->cr_groups, kp->ki_groups, kp->ki_ngroups * sizeof(gid_t)); kp->ki_rgid = cred->cr_rgid; kp->ki_svgid = cred->cr_svgid; /* If jailed(cred), emulate the old P_JAILED flag. */ if (jailed(cred)) { kp->ki_flag |= P_JAILED; /* If inside the jail, use 0 as a jail ID. */ if (cred->cr_prison != curthread->td_ucred->cr_prison) kp->ki_jid = cred->cr_prison->pr_id; } strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name, sizeof(kp->ki_loginclass)); } ps = p->p_sigacts; if (ps) { mtx_lock(&ps->ps_mtx); kp->ki_sigignore = ps->ps_sigignore; kp->ki_sigcatch = ps->ps_sigcatch; mtx_unlock(&ps->ps_mtx); } if (p->p_state != PRS_NEW && p->p_state != PRS_ZOMBIE && p->p_vmspace != NULL) { struct vmspace *vm = p->p_vmspace; kp->ki_size = vm->vm_map.size; kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/ FOREACH_THREAD_IN_PROC(p, td0) { if (!TD_IS_SWAPPED(td0)) kp->ki_rssize += td0->td_kstack_pages; } kp->ki_swrss = vm->vm_swrss; kp->ki_tsize = vm->vm_tsize; kp->ki_dsize = vm->vm_dsize; kp->ki_ssize = vm->vm_ssize; } else if (p->p_state == PRS_ZOMBIE) kp->ki_stat = SZOMB; if (kp->ki_flag & P_INMEM) kp->ki_sflag = PS_INMEM; else kp->ki_sflag = 0; /* Calculate legacy swtime as seconds since 'swtick'. */ kp->ki_swtime = (ticks - p->p_swtick) / hz; kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; kp->ki_fibnum = p->p_fibnum; kp->ki_start = p->p_stats->p_start; getboottime(&boottime); timevaladd(&kp->ki_start, &boottime); PROC_STATLOCK(p); rufetch(p, &kp->ki_rusage); kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime); PROC_STATUNLOCK(p); calccru(p, &kp->ki_childutime, &kp->ki_childstime); /* Some callers want child times in a single value. */ kp->ki_childtime = kp->ki_childstime; timevaladd(&kp->ki_childtime, &kp->ki_childutime); FOREACH_THREAD_IN_PROC(p, td0) kp->ki_cow += td0->td_cow; tp = NULL; if (p->p_pgrp) { kp->ki_pgid = p->p_pgrp->pg_id; kp->ki_jobc = p->p_pgrp->pg_jobc; sp = p->p_pgrp->pg_session; if (sp != NULL) { kp->ki_sid = sp->s_sid; SESS_LOCK(sp); strlcpy(kp->ki_login, sp->s_login, sizeof(kp->ki_login)); if (sp->s_ttyvp) kp->ki_kiflag |= KI_CTTY; if (SESS_LEADER(p)) kp->ki_kiflag |= KI_SLEADER; /* XXX proctree_lock */ tp = sp->s_ttyp; SESS_UNLOCK(sp); } } if ((p->p_flag & P_CONTROLT) && tp != NULL) { kp->ki_tdev = tty_udev(tp); kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; if (tp->t_session) kp->ki_tsid = tp->t_session->s_sid; } else { kp->ki_tdev = NODEV; kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ } if (p->p_comm[0] != '\0') strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm)); if (p->p_sysent && p->p_sysent->sv_name != NULL && p->p_sysent->sv_name[0] != '\0') strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul)); kp->ki_siglist = p->p_siglist; kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig); kp->ki_acflag = p->p_acflag; kp->ki_lock = p->p_lock; if (p->p_pptr) { kp->ki_ppid = p->p_oppid; if (p->p_flag & P_TRACED) kp->ki_tracer = p->p_pptr->p_pid; } } /* * Fill in information that is thread specific. Must be called with * target process locked. If 'preferthread' is set, overwrite certain * process-related fields that are maintained for both threads and * processes. */ static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread) { struct proc *p; p = td->td_proc; kp->ki_tdaddr = td; PROC_LOCK_ASSERT(p, MA_OWNED); if (preferthread) PROC_STATLOCK(p); thread_lock(td); if (td->td_wmesg != NULL) strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg)); else bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg)); if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >= sizeof(kp->ki_tdname)) { strlcpy(kp->ki_moretdname, td->td_name + sizeof(kp->ki_tdname) - 1, sizeof(kp->ki_moretdname)); } else { bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname)); } if (TD_ON_LOCK(td)) { kp->ki_kiflag |= KI_LOCKBLOCK; strlcpy(kp->ki_lockname, td->td_lockname, sizeof(kp->ki_lockname)); } else { kp->ki_kiflag &= ~KI_LOCKBLOCK; bzero(kp->ki_lockname, sizeof(kp->ki_lockname)); } if (p->p_state == PRS_NORMAL) { /* approximate. */ if (TD_ON_RUNQ(td) || TD_CAN_RUN(td) || TD_IS_RUNNING(td)) { kp->ki_stat = SRUN; } else if (P_SHOULDSTOP(p)) { kp->ki_stat = SSTOP; } else if (TD_IS_SLEEPING(td)) { kp->ki_stat = SSLEEP; } else if (TD_ON_LOCK(td)) { kp->ki_stat = SLOCK; } else { kp->ki_stat = SWAIT; } } else if (p->p_state == PRS_ZOMBIE) { kp->ki_stat = SZOMB; } else { kp->ki_stat = SIDL; } /* Things in the thread */ kp->ki_wchan = td->td_wchan; kp->ki_pri.pri_level = td->td_priority; kp->ki_pri.pri_native = td->td_base_pri; /* * Note: legacy fields; clamp at the old NOCPU value and/or * the maximum u_char CPU value. */ if (td->td_lastcpu == NOCPU) kp->ki_lastcpu_old = NOCPU_OLD; else if (td->td_lastcpu > MAXCPU_OLD) kp->ki_lastcpu_old = MAXCPU_OLD; else kp->ki_lastcpu_old = td->td_lastcpu; if (td->td_oncpu == NOCPU) kp->ki_oncpu_old = NOCPU_OLD; else if (td->td_oncpu > MAXCPU_OLD) kp->ki_oncpu_old = MAXCPU_OLD; else kp->ki_oncpu_old = td->td_oncpu; kp->ki_lastcpu = td->td_lastcpu; kp->ki_oncpu = td->td_oncpu; kp->ki_tdflags = td->td_flags; kp->ki_tid = td->td_tid; kp->ki_numthreads = p->p_numthreads; kp->ki_pcb = td->td_pcb; kp->ki_kstack = (void *)td->td_kstack; kp->ki_slptime = (ticks - td->td_slptick) / hz; kp->ki_pri.pri_class = td->td_pri_class; kp->ki_pri.pri_user = td->td_user_pri; if (preferthread) { rufetchtd(td, &kp->ki_rusage); kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime); kp->ki_pctcpu = sched_pctcpu(td); kp->ki_estcpu = sched_estcpu(td); kp->ki_cow = td->td_cow; } /* We can't get this anymore but ps etc never used it anyway. */ kp->ki_rqindex = 0; if (preferthread) kp->ki_siglist = td->td_siglist; kp->ki_sigmask = td->td_sigmask; thread_unlock(td); if (preferthread) PROC_STATUNLOCK(p); } /* * Fill in a kinfo_proc structure for the specified process. * Must be called with the target process locked. */ void fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) { MPASS(FIRST_THREAD_IN_PROC(p) != NULL); fill_kinfo_proc_only(p, kp); fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0); fill_kinfo_aggregate(p, kp); } struct pstats * pstats_alloc(void) { return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK)); } /* * Copy parts of p_stats; zero the rest of p_stats (statistics). */ void pstats_fork(struct pstats *src, struct pstats *dst) { bzero(&dst->pstat_startzero, __rangeof(struct pstats, pstat_startzero, pstat_endzero)); bcopy(&src->pstat_startcopy, &dst->pstat_startcopy, __rangeof(struct pstats, pstat_startcopy, pstat_endcopy)); } void pstats_free(struct pstats *ps) { free(ps, M_SUBPROC); } #ifdef COMPAT_FREEBSD32 /* * This function is typically used to copy out the kernel address, so * it can be replaced by assignment of zero. */ static inline uint32_t ptr32_trim(const void *ptr) { uintptr_t uptr; uptr = (uintptr_t)ptr; return ((uptr > UINT_MAX) ? 0 : uptr); } #define PTRTRIM_CP(src,dst,fld) \ do { (dst).fld = ptr32_trim((src).fld); } while (0) static void freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32) { int i; bzero(ki32, sizeof(struct kinfo_proc32)); ki32->ki_structsize = sizeof(struct kinfo_proc32); CP(*ki, *ki32, ki_layout); PTRTRIM_CP(*ki, *ki32, ki_args); PTRTRIM_CP(*ki, *ki32, ki_paddr); PTRTRIM_CP(*ki, *ki32, ki_addr); PTRTRIM_CP(*ki, *ki32, ki_tracep); PTRTRIM_CP(*ki, *ki32, ki_textvp); PTRTRIM_CP(*ki, *ki32, ki_fd); PTRTRIM_CP(*ki, *ki32, ki_vmspace); PTRTRIM_CP(*ki, *ki32, ki_wchan); CP(*ki, *ki32, ki_pid); CP(*ki, *ki32, ki_ppid); CP(*ki, *ki32, ki_pgid); CP(*ki, *ki32, ki_tpgid); CP(*ki, *ki32, ki_sid); CP(*ki, *ki32, ki_tsid); CP(*ki, *ki32, ki_jobc); CP(*ki, *ki32, ki_tdev); CP(*ki, *ki32, ki_tdev_freebsd11); CP(*ki, *ki32, ki_siglist); CP(*ki, *ki32, ki_sigmask); CP(*ki, *ki32, ki_sigignore); CP(*ki, *ki32, ki_sigcatch); CP(*ki, *ki32, ki_uid); CP(*ki, *ki32, ki_ruid); CP(*ki, *ki32, ki_svuid); CP(*ki, *ki32, ki_rgid); CP(*ki, *ki32, ki_svgid); CP(*ki, *ki32, ki_ngroups); for (i = 0; i < KI_NGROUPS; i++) CP(*ki, *ki32, ki_groups[i]); CP(*ki, *ki32, ki_size); CP(*ki, *ki32, ki_rssize); CP(*ki, *ki32, ki_swrss); CP(*ki, *ki32, ki_tsize); CP(*ki, *ki32, ki_dsize); CP(*ki, *ki32, ki_ssize); CP(*ki, *ki32, ki_xstat); CP(*ki, *ki32, ki_acflag); CP(*ki, *ki32, ki_pctcpu); CP(*ki, *ki32, ki_estcpu); CP(*ki, *ki32, ki_slptime); CP(*ki, *ki32, ki_swtime); CP(*ki, *ki32, ki_cow); CP(*ki, *ki32, ki_runtime); TV_CP(*ki, *ki32, ki_start); TV_CP(*ki, *ki32, ki_childtime); CP(*ki, *ki32, ki_flag); CP(*ki, *ki32, ki_kiflag); CP(*ki, *ki32, ki_traceflag); CP(*ki, *ki32, ki_stat); CP(*ki, *ki32, ki_nice); CP(*ki, *ki32, ki_lock); CP(*ki, *ki32, ki_rqindex); CP(*ki, *ki32, ki_oncpu); CP(*ki, *ki32, ki_lastcpu); /* XXX TODO: wrap cpu value as appropriate */ CP(*ki, *ki32, ki_oncpu_old); CP(*ki, *ki32, ki_lastcpu_old); bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1); bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1); bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1); bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1); bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1); bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1); bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1); bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1); CP(*ki, *ki32, ki_tracer); CP(*ki, *ki32, ki_flag2); CP(*ki, *ki32, ki_fibnum); CP(*ki, *ki32, ki_cr_flags); CP(*ki, *ki32, ki_jid); CP(*ki, *ki32, ki_numthreads); CP(*ki, *ki32, ki_tid); CP(*ki, *ki32, ki_pri); freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage); freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch); PTRTRIM_CP(*ki, *ki32, ki_pcb); PTRTRIM_CP(*ki, *ki32, ki_kstack); PTRTRIM_CP(*ki, *ki32, ki_udata); PTRTRIM_CP(*ki, *ki32, ki_tdaddr); CP(*ki, *ki32, ki_sflag); CP(*ki, *ki32, ki_tdflags); } #endif static ssize_t kern_proc_out_size(struct proc *p, int flags) { ssize_t size = 0; PROC_LOCK_ASSERT(p, MA_OWNED); if ((flags & KERN_PROC_NOTHREADS) != 0) { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { size += sizeof(struct kinfo_proc32); } else #endif size += sizeof(struct kinfo_proc); } else { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) size += sizeof(struct kinfo_proc32) * p->p_numthreads; else #endif size += sizeof(struct kinfo_proc) * p->p_numthreads; } PROC_UNLOCK(p); return (size); } int kern_proc_out(struct proc *p, struct sbuf *sb, int flags) { struct thread *td; struct kinfo_proc ki; #ifdef COMPAT_FREEBSD32 struct kinfo_proc32 ki32; #endif int error; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS(FIRST_THREAD_IN_PROC(p) != NULL); error = 0; fill_kinfo_proc(p, &ki); if ((flags & KERN_PROC_NOTHREADS) != 0) { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) error = ENOMEM; } else #endif if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) error = ENOMEM; } else { FOREACH_THREAD_IN_PROC(p, td) { fill_kinfo_thread(td, &ki, 1); #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) error = ENOMEM; } else #endif if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) error = ENOMEM; if (error != 0) break; } } PROC_UNLOCK(p); return (error); } static int sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) { struct sbuf sb; struct kinfo_proc ki; int error, error2; if (req->oldptr == NULL) return (SYSCTL_OUT(req, 0, kern_proc_out_size(p, flags))); sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = kern_proc_out(p, &sb, flags); error2 = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0) return (error); else if (error2 != 0) return (error2); return (0); } int proc_iterate(int (*cb)(struct proc *, void *), void *cbarg) { struct proc *p; int error, i, j; for (i = 0; i < pidhashlock + 1; i++) { sx_slock(&pidhashtbl_lock[i]); for (j = i; j <= pidhash; j += pidhashlock + 1) { LIST_FOREACH(p, &pidhashtbl[j], p_hash) { if (p->p_state == PRS_NEW) continue; error = cb(p, cbarg); PROC_LOCK_ASSERT(p, MA_NOTOWNED); if (error != 0) { sx_sunlock(&pidhashtbl_lock[i]); return (error); } } } sx_sunlock(&pidhashtbl_lock[i]); } return (0); } struct kern_proc_out_args { struct sysctl_req *req; int flags; int oid_number; int *name; }; static int sysctl_kern_proc_iterate(struct proc *p, void *origarg) { struct kern_proc_out_args *arg = origarg; int *name = arg->name; int oid_number = arg->oid_number; int flags = arg->flags; struct sysctl_req *req = arg->req; int error = 0; PROC_LOCK(p); KASSERT(p->p_ucred != NULL, ("process credential is NULL for non-NEW proc")); /* * Show a user only appropriate processes. */ if (p_cansee(curthread, p)) goto skip; /* * TODO - make more efficient (see notes below). * do by session. */ switch (oid_number) { case KERN_PROC_GID: if (p->p_ucred->cr_gid != (gid_t)name[0]) goto skip; break; case KERN_PROC_PGRP: /* could do this by traversing pgrp */ if (p->p_pgrp == NULL || p->p_pgrp->pg_id != (pid_t)name[0]) goto skip; break; case KERN_PROC_RGID: if (p->p_ucred->cr_rgid != (gid_t)name[0]) goto skip; break; case KERN_PROC_SESSION: if (p->p_session == NULL || p->p_session->s_sid != (pid_t)name[0]) goto skip; break; case KERN_PROC_TTY: if ((p->p_flag & P_CONTROLT) == 0 || p->p_session == NULL) goto skip; /* XXX proctree_lock */ SESS_LOCK(p->p_session); if (p->p_session->s_ttyp == NULL || tty_udev(p->p_session->s_ttyp) != (dev_t)name[0]) { SESS_UNLOCK(p->p_session); goto skip; } SESS_UNLOCK(p->p_session); break; case KERN_PROC_UID: if (p->p_ucred->cr_uid != (uid_t)name[0]) goto skip; break; case KERN_PROC_RUID: if (p->p_ucred->cr_ruid != (uid_t)name[0]) goto skip; break; case KERN_PROC_PROC: break; default: break; } error = sysctl_out_proc(p, req, flags); PROC_LOCK_ASSERT(p, MA_NOTOWNED); return (error); skip: PROC_UNLOCK(p); return (0); } static int sysctl_kern_proc(SYSCTL_HANDLER_ARGS) { struct kern_proc_out_args iterarg; int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int flags, oid_number; int error = 0; oid_number = oidp->oid_number; if (oid_number != KERN_PROC_ALL && (oid_number & KERN_PROC_INC_THREAD) == 0) flags = KERN_PROC_NOTHREADS; else { flags = 0; oid_number &= ~KERN_PROC_INC_THREAD; } #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) flags |= KERN_PROC_MASK32; #endif if (oid_number == KERN_PROC_PID) { if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, 0); if (error) return (error); error = pget((pid_t)name[0], PGET_CANSEE, &p); if (error == 0) error = sysctl_out_proc(p, req, flags); return (error); } switch (oid_number) { case KERN_PROC_ALL: if (namelen != 0) return (EINVAL); break; case KERN_PROC_PROC: if (namelen != 0 && namelen != 1) return (EINVAL); break; default: if (namelen != 1) return (EINVAL); break; } if (req->oldptr == NULL) { /* overestimate by 5 procs */ error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); if (error) return (error); } else { error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); } iterarg.flags = flags; iterarg.oid_number = oid_number; iterarg.req = req; iterarg.name = name; error = proc_iterate(sysctl_kern_proc_iterate, &iterarg); return (error); } struct pargs * pargs_alloc(int len) { struct pargs *pa; pa = malloc(sizeof(struct pargs) + len, M_PARGS, M_WAITOK); refcount_init(&pa->ar_ref, 1); pa->ar_length = len; return (pa); } static void pargs_free(struct pargs *pa) { free(pa, M_PARGS); } void pargs_hold(struct pargs *pa) { if (pa == NULL) return; refcount_acquire(&pa->ar_ref); } void pargs_drop(struct pargs *pa) { if (pa == NULL) return; if (refcount_release(&pa->ar_ref)) pargs_free(pa); } static int proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf, size_t len) { ssize_t n; /* * This may return a short read if the string is shorter than the chunk * and is aligned at the end of the page, and the following page is not * mapped. */ n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len); if (n <= 0) return (ENOMEM); return (0); } #define PROC_AUXV_MAX 256 /* Safety limit on auxv size. */ enum proc_vector_type { PROC_ARG, PROC_ENV, PROC_AUX, }; #ifdef COMPAT_FREEBSD32 static int get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct freebsd32_ps_strings pss; Elf32_Auxinfo aux; vm_offset_t vptr, ptr; uint32_t *proc_vector32; char **proc_vector; size_t vsize, size; int i, error; error = 0; if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss, sizeof(pss)) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: vptr = (vm_offset_t)PTRIN(pss.ps_argvstr); vsize = pss.ps_nargvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(int32_t); break; case PROC_ENV: vptr = (vm_offset_t)PTRIN(pss.ps_envstr); vsize = pss.ps_nenvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(int32_t); break; case PROC_AUX: vptr = (vm_offset_t)PTRIN(pss.ps_envstr) + (pss.ps_nenvstr + 1) * sizeof(int32_t); if (vptr % 4 != 0) return (ENOEXEC); for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; ptr += sizeof(aux); } if (aux.a_type != AT_NULL) return (ENOEXEC); vsize = i + 1; size = vsize * sizeof(aux); break; default: KASSERT(0, ("Wrong proc vector type: %d", type)); return (EINVAL); } proc_vector32 = malloc(size, M_TEMP, M_WAITOK); if (proc_readmem(td, p, vptr, proc_vector32, size) != size) { error = ENOMEM; goto done; } if (type == PROC_AUX) { *proc_vectorp = (char **)proc_vector32; *vsizep = vsize; return (0); } proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK); for (i = 0; i < (int)vsize; i++) proc_vector[i] = PTRIN(proc_vector32[i]); *proc_vectorp = proc_vector; *vsizep = vsize; done: free(proc_vector32, M_TEMP); return (error); } #endif static int get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct ps_strings pss; Elf_Auxinfo aux; vm_offset_t vptr, ptr; char **proc_vector; size_t vsize, size; int i; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) return (get_proc_vector32(td, p, proc_vectorp, vsizep, type)); #endif if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss, sizeof(pss)) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: vptr = (vm_offset_t)pss.ps_argvstr; vsize = pss.ps_nargvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(char *); break; case PROC_ENV: vptr = (vm_offset_t)pss.ps_envstr; vsize = pss.ps_nenvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(char *); break; case PROC_AUX: /* * The aux array is just above env array on the stack. Check * that the address is naturally aligned. */ vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1) * sizeof(char *); #if __ELF_WORD_SIZE == 64 if (vptr % sizeof(uint64_t) != 0) #else if (vptr % sizeof(uint32_t) != 0) #endif return (ENOEXEC); /* * We count the array size reading the aux vectors from the * stack until AT_NULL vector is returned. So (to keep the code * simple) we read the process stack twice: the first time here * to find the size and the second time when copying the vectors * to the allocated proc_vector. */ for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; ptr += sizeof(aux); } /* * If the PROC_AUXV_MAX entries are iterated over, and we have * not reached AT_NULL, it is most likely we are reading wrong * data: either the process doesn't have auxv array or data has * been modified. Return the error in this case. */ if (aux.a_type != AT_NULL) return (ENOEXEC); vsize = i + 1; size = vsize * sizeof(aux); break; default: KASSERT(0, ("Wrong proc vector type: %d", type)); return (EINVAL); /* In case we are built without INVARIANTS. */ } proc_vector = malloc(size, M_TEMP, M_WAITOK); if (proc_readmem(td, p, vptr, proc_vector, size) != size) { free(proc_vector, M_TEMP); return (ENOMEM); } *proc_vectorp = proc_vector; *vsizep = vsize; return (0); } #define GET_PS_STRINGS_CHUNK_SZ 256 /* Chunk size (bytes) for ps_strings operations. */ static int get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb, enum proc_vector_type type) { size_t done, len, nchr, vsize; int error, i; char **proc_vector, *sptr; char pss_string[GET_PS_STRINGS_CHUNK_SZ]; PROC_ASSERT_HELD(p); /* * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes. */ nchr = 2 * (PATH_MAX + ARG_MAX); error = get_proc_vector(td, p, &proc_vector, &vsize, type); if (error != 0) return (error); for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) { /* * The program may have scribbled into its argv array, e.g. to * remove some arguments. If that has happened, break out * before trying to read from NULL. */ if (proc_vector[i] == NULL) break; for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) { error = proc_read_string(td, p, sptr, pss_string, sizeof(pss_string)); if (error != 0) goto done; len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ); if (done + len >= nchr) len = nchr - done - 1; sbuf_bcat(sb, pss_string, len); if (len != GET_PS_STRINGS_CHUNK_SZ) break; done += GET_PS_STRINGS_CHUNK_SZ; } sbuf_bcat(sb, "", 1); done += len + 1; } done: free(proc_vector, M_TEMP); return (error); } int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb) { return (get_ps_strings(curthread, p, sb, PROC_ARG)); } int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb) { return (get_ps_strings(curthread, p, sb, PROC_ENV)); } int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb) { size_t vsize, size; char **auxv; int error; error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX); if (error == 0) { #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) size = vsize * sizeof(Elf32_Auxinfo); else #endif size = vsize * sizeof(Elf_Auxinfo); if (sbuf_bcat(sb, auxv, size) != 0) error = ENOMEM; free(auxv, M_TEMP); } return (error); } /* * This sysctl allows a process to retrieve the argument list or process * title for another process without groping around in the address space * of the other process. It also allow a process to set its own "process * title to a string of its own choice. */ static int sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct pargs *newpa, *pa; struct proc *p; struct sbuf sb; int flags, error = 0, error2; pid_t pid; if (namelen != 1) return (EINVAL); pid = (pid_t)name[0]; /* * If the query is for this process and it is single-threaded, there * is nobody to modify pargs, thus we can just read. */ p = curproc; if (pid == p->p_pid && p->p_numthreads == 1 && req->newptr == NULL && (pa = p->p_args) != NULL) return (SYSCTL_OUT(req, pa->ar_args, pa->ar_length)); flags = PGET_CANSEE; if (req->newptr != NULL) flags |= PGET_ISCURRENT; error = pget(pid, flags, &p); if (error) return (error); pa = p->p_args; if (pa != NULL) { pargs_hold(pa); PROC_UNLOCK(p); error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length); pargs_drop(pa); } else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) { _PHOLD(p); PROC_UNLOCK(p); sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getargv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); if (error == 0 && error2 != 0) error = error2; } else { PROC_UNLOCK(p); } if (error != 0 || req->newptr == NULL) return (error); if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs)) return (ENOMEM); if (req->newlen == 0) { /* * Clear the argument pointer, so that we'll fetch arguments * with proc_getargv() until further notice. */ newpa = NULL; } else { newpa = pargs_alloc(req->newlen); error = SYSCTL_IN(req, newpa->ar_args, req->newlen); if (error != 0) { pargs_free(newpa); return (error); } } PROC_LOCK(p); pa = p->p_args; p->p_args = newpa; PROC_UNLOCK(p); pargs_drop(pa); return (0); } /* * This sysctl allows a process to retrieve environment of another process. */ static int sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct sbuf sb; int error, error2; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); if ((p->p_flag & P_SYSTEM) != 0) { PRELE(p); return (0); } sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getenvv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); return (error != 0 ? error : error2); } /* * This sysctl allows a process to retrieve ELF auxiliary vector of * another process. */ static int sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct sbuf sb; int error, error2; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); if ((p->p_flag & P_SYSTEM) != 0) { PRELE(p); return (0); } sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getauxv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); return (error != 0 ? error : error2); } /* * This sysctl allows a process to retrieve the path of the executable for * itself or another process. */ static int sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS) { pid_t *pidp = (pid_t *)arg1; unsigned int arglen = arg2; struct proc *p; struct vnode *vp; char *retbuf, *freebuf; int error; if (arglen != 1) return (EINVAL); if (*pidp == -1) { /* -1 means this process */ p = req->td->td_proc; } else { error = pget(*pidp, PGET_CANSEE, &p); if (error != 0) return (error); } vp = p->p_textvp; if (vp == NULL) { if (*pidp != -1) PROC_UNLOCK(p); return (0); } vref(vp); if (*pidp != -1) PROC_UNLOCK(p); error = vn_fullpath(vp, &retbuf, &freebuf); vrele(vp); if (error) return (error); error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1); free(freebuf, M_TEMP); return (error); } static int sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS) { struct proc *p; char *sv_name; int *name; int namelen; int error; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; error = pget((pid_t)name[0], PGET_CANSEE, &p); if (error != 0) return (error); sv_name = p->p_sysent->sv_name; PROC_UNLOCK(p); return (sysctl_handle_string(oidp, sv_name, 0, req)); } #ifdef KINFO_OVMENTRY_SIZE CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE); #endif #ifdef COMPAT_FREEBSD7 static int sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS) { vm_map_entry_t entry, tmp_entry; unsigned int last_timestamp; char *fullpath, *freepath; struct kinfo_ovmentry *kve; struct vattr va; struct ucred *cred; int error, *name; struct vnode *vp; struct proc *p; vm_map_t map; struct vmspace *vm; name = (int *)arg1; error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); vm = vmspace_acquire_ref(p); if (vm == NULL) { PRELE(p); return (ESRCH); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK); map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { vm_object_t obj, tobj, lobj; vm_offset_t addr; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; bzero(kve, sizeof(*kve)); kve->kve_structsize = sizeof(*kve); kve->kve_private_resident = 0; obj = entry->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); if (obj->shadow_count == 1) kve->kve_private_resident = obj->resident_page_count; } kve->kve_resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(map->pmap, addr)) kve->kve_resident++; addr += PAGE_SIZE; } for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { if (tobj != obj) { VM_OBJECT_RLOCK(tobj); kve->kve_offset += tobj->backing_object_offset; } if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } kve->kve_start = (void*)entry->start; kve->kve_end = (void*)entry->end; kve->kve_offset += (off_t)entry->offset; if (entry->protection & VM_PROT_READ) kve->kve_protection |= KVME_PROT_READ; if (entry->protection & VM_PROT_WRITE) kve->kve_protection |= KVME_PROT_WRITE; if (entry->protection & VM_PROT_EXECUTE) kve->kve_protection |= KVME_PROT_EXEC; if (entry->eflags & MAP_ENTRY_COW) kve->kve_flags |= KVME_FLAG_COW; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) kve->kve_flags |= KVME_FLAG_NEEDS_COPY; if (entry->eflags & MAP_ENTRY_NOCOREDUMP) kve->kve_flags |= KVME_FLAG_NOCOREDUMP; last_timestamp = map->timestamp; vm_map_unlock_read(map); kve->kve_fileid = 0; kve->kve_fsid = 0; freepath = NULL; fullpath = ""; if (lobj) { kve->kve_type = vm_object_kvme_type(lobj, &vp); if (kve->kve_type == KVME_TYPE_MGTDEVICE) kve->kve_type = KVME_TYPE_UNKNOWN; if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); kve->kve_ref_count = obj->ref_count; kve->kve_shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(vp, &fullpath, &freepath); cred = curthread->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_fileid = va.va_fileid; /* truncate */ kve->kve_fsid = va.va_fsid; } vput(vp); } } else { kve->kve_type = KVME_TYPE_NONE; kve->kve_ref_count = 0; kve->kve_shadow_count = 0; } strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); if (freepath != NULL) free(freepath, M_TEMP); error = SYSCTL_OUT(req, kve, sizeof(*kve)); vm_map_lock_read(map); if (error) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); PRELE(p); free(kve, M_TEMP); return (error); } #endif /* COMPAT_FREEBSD7 */ #ifdef KINFO_VMENTRY_SIZE CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); #endif void kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry, int *resident_count, bool *super) { vm_object_t obj, tobj; vm_page_t m, m_adv; vm_offset_t addr; vm_paddr_t pa; vm_pindex_t pi, pi_adv, pindex; *super = false; *resident_count = 0; if (vmmap_skip_res_cnt) return; pa = 0; obj = entry->object.vm_object; addr = entry->start; m_adv = NULL; pi = OFF_TO_IDX(entry->offset); for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) { if (m_adv != NULL) { m = m_adv; } else { pi_adv = atop(entry->end - addr); pindex = pi; for (tobj = obj;; tobj = tobj->backing_object) { m = vm_page_find_least(tobj, pindex); if (m != NULL) { if (m->pindex == pindex) break; if (pi_adv > m->pindex - pindex) { pi_adv = m->pindex - pindex; m_adv = m; } } if (tobj->backing_object == NULL) goto next; pindex += OFF_TO_IDX(tobj-> backing_object_offset); } } m_adv = NULL; if (m->psind != 0 && addr + pagesizes[1] <= entry->end && (addr & (pagesizes[1] - 1)) == 0 && (pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) { *super = true; pi_adv = atop(pagesizes[1]); } else { /* * We do not test the found page on validity. * Either the page is busy and being paged in, * or it was invalidated. The first case * should be counted as resident, the second * is not so clear; we do account both. */ pi_adv = 1; } *resident_count += pi_adv; next:; } } /* * Must be called with the process locked and will return unlocked. */ int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { vm_map_entry_t entry, tmp_entry; struct vattr va; vm_map_t map; vm_object_t obj, tobj, lobj; char *fullpath, *freepath; struct kinfo_vmentry *kve; struct ucred *cred; struct vnode *vp; struct vmspace *vm; vm_offset_t addr; unsigned int last_timestamp; int error; bool super; PROC_LOCK_ASSERT(p, MA_OWNED); _PHOLD(p); PROC_UNLOCK(p); vm = vmspace_acquire_ref(p); if (vm == NULL) { PRELE(p); return (ESRCH); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); error = 0; map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; addr = entry->end; bzero(kve, sizeof(*kve)); obj = entry->object.vm_object; if (obj != NULL) { for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { VM_OBJECT_RLOCK(tobj); kve->kve_offset += tobj->backing_object_offset; lobj = tobj; } if (obj->backing_object == NULL) kve->kve_private_resident = obj->resident_page_count; kern_proc_vmmap_resident(map, entry, &kve->kve_resident, &super); if (super) kve->kve_flags |= KVME_FLAG_SUPER; for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj && tobj != lobj) VM_OBJECT_RUNLOCK(tobj); } } else { lobj = NULL; } kve->kve_start = entry->start; kve->kve_end = entry->end; kve->kve_offset += entry->offset; if (entry->protection & VM_PROT_READ) kve->kve_protection |= KVME_PROT_READ; if (entry->protection & VM_PROT_WRITE) kve->kve_protection |= KVME_PROT_WRITE; if (entry->protection & VM_PROT_EXECUTE) kve->kve_protection |= KVME_PROT_EXEC; if (entry->eflags & MAP_ENTRY_COW) kve->kve_flags |= KVME_FLAG_COW; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) kve->kve_flags |= KVME_FLAG_NEEDS_COPY; if (entry->eflags & MAP_ENTRY_NOCOREDUMP) kve->kve_flags |= KVME_FLAG_NOCOREDUMP; if (entry->eflags & MAP_ENTRY_GROWS_UP) kve->kve_flags |= KVME_FLAG_GROWS_UP; if (entry->eflags & MAP_ENTRY_GROWS_DOWN) kve->kve_flags |= KVME_FLAG_GROWS_DOWN; if (entry->eflags & MAP_ENTRY_USER_WIRED) kve->kve_flags |= KVME_FLAG_USER_WIRED; last_timestamp = map->timestamp; vm_map_unlock_read(map); freepath = NULL; fullpath = ""; if (lobj != NULL) { kve->kve_type = vm_object_kvme_type(lobj, &vp); if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); kve->kve_ref_count = obj->ref_count; kve->kve_shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(vp, &fullpath, &freepath); kve->kve_vn_type = vntype_to_kinfo(vp->v_type); cred = curthread->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_vn_fileid = va.va_fileid; kve->kve_vn_fsid = va.va_fsid; kve->kve_vn_fsid_freebsd11 = kve->kve_vn_fsid; /* truncate */ kve->kve_vn_mode = MAKEIMODE(va.va_type, va.va_mode); kve->kve_vn_size = va.va_size; kve->kve_vn_rdev = va.va_rdev; kve->kve_vn_rdev_freebsd11 = kve->kve_vn_rdev; /* truncate */ kve->kve_status = KF_ATTR_VALID; } vput(vp); } } else { kve->kve_type = KVME_TYPE_NONE; kve->kve_ref_count = 0; kve->kve_shadow_count = 0; } strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); if (freepath != NULL) free(freepath, M_TEMP); /* Pack record size down */ if ((flags & KERN_VMMAP_PACK_KINFO) != 0) kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) + strlen(kve->kve_path) + 1; else kve->kve_structsize = sizeof(*kve); kve->kve_structsize = roundup(kve->kve_structsize, sizeof(uint64_t)); /* Halt filling and truncate rather than exceeding maxlen */ if (maxlen != -1 && maxlen < kve->kve_structsize) { error = 0; vm_map_lock_read(map); break; } else if (maxlen != -1) maxlen -= kve->kve_structsize; if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0) error = ENOMEM; vm_map_lock_read(map); if (error != 0) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); PRELE(p); free(kve, M_TEMP); return (error); } static int sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS) { struct proc *p; struct sbuf sb; int error, error2, *name; name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } #if defined(STACK) || defined(DDB) static int sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS) { struct kinfo_kstack *kkstp; int error, i, *name, numthreads; lwpid_t *lwpidarray; struct thread *td; struct stack *st; struct sbuf sb; struct proc *p; name = (int *)arg1; error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p); if (error != 0) return (error); kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK); st = stack_create(M_WAITOK); lwpidarray = NULL; PROC_LOCK(p); do { if (lwpidarray != NULL) { free(lwpidarray, M_TEMP); lwpidarray = NULL; } numthreads = p->p_numthreads; PROC_UNLOCK(p); lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP, M_WAITOK | M_ZERO); PROC_LOCK(p); } while (numthreads < p->p_numthreads); /* * XXXRW: During the below loop, execve(2) and countless other sorts * of changes could have taken place. Should we check to see if the * vmspace has been replaced, or the like, in order to prevent * giving a snapshot that spans, say, execve(2), with some threads * before and some after? Among other things, the credentials could * have changed, in which case the right to extract debug info might * no longer be assured. */ i = 0; FOREACH_THREAD_IN_PROC(p, td) { KASSERT(i < numthreads, ("sysctl_kern_proc_kstack: numthreads")); lwpidarray[i] = td->td_tid; i++; } PROC_UNLOCK(p); numthreads = i; for (i = 0; i < numthreads; i++) { td = tdfind(lwpidarray[i], p->p_pid); if (td == NULL) { continue; } bzero(kkstp, sizeof(*kkstp)); (void)sbuf_new(&sb, kkstp->kkst_trace, sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN); thread_lock(td); kkstp->kkst_tid = td->td_tid; if (TD_IS_SWAPPED(td)) kkstp->kkst_state = KKST_STATE_SWAPPED; else if (stack_save_td(st, td) == 0) kkstp->kkst_state = KKST_STATE_STACKOK; else kkstp->kkst_state = KKST_STATE_RUNNING; thread_unlock(td); PROC_UNLOCK(p); stack_sbuf_print(&sb, st); sbuf_finish(&sb); sbuf_delete(&sb); error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp)); if (error) break; } PRELE(p); if (lwpidarray != NULL) free(lwpidarray, M_TEMP); stack_destroy(st); free(kkstp, M_TEMP); return (error); } #endif /* * This sysctl allows a process to retrieve the full list of groups from * itself or another process. */ static int sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS) { pid_t *pidp = (pid_t *)arg1; unsigned int arglen = arg2; struct proc *p; struct ucred *cred; int error; if (arglen != 1) return (EINVAL); if (*pidp == -1) { /* -1 means this process */ p = req->td->td_proc; PROC_LOCK(p); } else { error = pget(*pidp, PGET_CANSEE, &p); if (error != 0) return (error); } cred = crhold(p->p_ucred); PROC_UNLOCK(p); error = SYSCTL_OUT(req, cred->cr_groups, cred->cr_ngroups * sizeof(gid_t)); crfree(cred); return (error); } /* * This sysctl allows a process to retrieve or/and set the resource limit for * another process. */ static int sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct rlimit rlim; struct proc *p; u_int which; int flags, error; if (namelen != 2) return (EINVAL); which = (u_int)name[1]; if (which >= RLIM_NLIMITS) return (EINVAL); if (req->newptr != NULL && req->newlen != sizeof(rlim)) return (EINVAL); flags = PGET_HOLD | PGET_NOTWEXIT; if (req->newptr != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); /* * Retrieve limit. */ if (req->oldptr != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); } error = SYSCTL_OUT(req, &rlim, sizeof(rlim)); if (error != 0) goto errout; /* * Set limit. */ if (req->newptr != NULL) { error = SYSCTL_IN(req, &rlim, sizeof(rlim)); if (error == 0) error = kern_proc_setrlimit(curthread, p, which, &rlim); } errout: PRELE(p); return (error); } /* * This sysctl allows a process to retrieve ps_strings structure location of * another process. */ static int sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; vm_offset_t ps_strings; int error; #ifdef COMPAT_FREEBSD32 uint32_t ps_strings32; #endif if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 if ((req->flags & SCTL_MASK32) != 0) { /* * We return 0 if the 32 bit emulation request is for a 64 bit * process. */ ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ? PTROUT(p->p_sysent->sv_psstrings) : 0; PROC_UNLOCK(p); error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32)); return (error); } #endif ps_strings = p->p_sysent->sv_psstrings; PROC_UNLOCK(p); error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings)); return (error); } /* * This sysctl allows a process to retrieve umask of another process. */ static int sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int error; u_short cmask; pid_t pid; if (namelen != 1) return (EINVAL); pid = (pid_t)name[0]; p = curproc; if (pid == p->p_pid || pid == 0) { cmask = p->p_pd->pd_cmask; goto out; } error = pget(pid, PGET_WANTREAD, &p); if (error != 0) return (error); cmask = p->p_pd->pd_cmask; PRELE(p); out: error = SYSCTL_OUT(req, &cmask, sizeof(cmask)); return (error); } /* * This sysctl allows a process to set and retrieve binary osreldate of * another process. */ static int sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int flags, error, osrel; if (namelen != 1) return (EINVAL); if (req->newptr != NULL && req->newlen != sizeof(osrel)) return (EINVAL); flags = PGET_HOLD | PGET_NOTWEXIT; if (req->newptr != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel)); if (error != 0) goto errout; if (req->newptr != NULL) { error = SYSCTL_IN(req, &osrel, sizeof(osrel)); if (error != 0) goto errout; if (osrel < 0) { error = EINVAL; goto errout; } p->p_osrel = osrel; } errout: PRELE(p); return (error); } static int sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct kinfo_sigtramp kst; const struct sysentvec *sv; int error; #ifdef COMPAT_FREEBSD32 struct kinfo_sigtramp32 kst32; #endif if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); sv = p->p_sysent; #ifdef COMPAT_FREEBSD32 if ((req->flags & SCTL_MASK32) != 0) { bzero(&kst32, sizeof(kst32)); if (SV_PROC_FLAG(p, SV_ILP32)) { if (sv->sv_sigcode_base != 0) { kst32.ksigtramp_start = sv->sv_sigcode_base; kst32.ksigtramp_end = sv->sv_sigcode_base + *sv->sv_szsigcode; } else { kst32.ksigtramp_start = sv->sv_psstrings - *sv->sv_szsigcode; kst32.ksigtramp_end = sv->sv_psstrings; } } PROC_UNLOCK(p); error = SYSCTL_OUT(req, &kst32, sizeof(kst32)); return (error); } #endif bzero(&kst, sizeof(kst)); if (sv->sv_sigcode_base != 0) { kst.ksigtramp_start = (char *)sv->sv_sigcode_base; kst.ksigtramp_end = (char *)sv->sv_sigcode_base + *sv->sv_szsigcode; } else { kst.ksigtramp_start = (char *)sv->sv_psstrings - *sv->sv_szsigcode; kst.ksigtramp_end = (char *)sv->sv_psstrings; } PROC_UNLOCK(p); error = SYSCTL_OUT(req, &kst, sizeof(kst)); return (error); } static int sysctl_kern_proc_sigfastblk(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; pid_t pid; struct proc *p; struct thread *td1; uintptr_t addr; #ifdef COMPAT_FREEBSD32 uint32_t addr32; #endif int error; if (namelen != 1 || req->newptr != NULL) return (EINVAL); pid = (pid_t)name[0]; error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p); if (error != 0) return (error); PROC_LOCK(p); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { if (!SV_PROC_FLAG(p, SV_ILP32)) { error = EINVAL; goto errlocked; } } #endif if (pid <= PID_MAX) { td1 = FIRST_THREAD_IN_PROC(p); } else { FOREACH_THREAD_IN_PROC(p, td1) { if (td1->td_tid == pid) break; } } if (td1 == NULL) { error = ESRCH; goto errlocked; } /* * The access to the private thread flags. It is fine as far * as no out-of-thin-air values are read from td_pflags, and * usermode read of the td_sigblock_ptr is racy inherently, * since target process might have already changed it * meantime. */ if ((td1->td_pflags & TDP_SIGFASTBLOCK) != 0) addr = (uintptr_t)td1->td_sigblock_ptr; else error = ENOTTY; errlocked: _PRELE(p); PROC_UNLOCK(p); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { addr32 = addr; error = SYSCTL_OUT(req, &addr32, sizeof(addr32)); } else #endif error = SYSCTL_OUT(req, &addr, sizeof(addr)); return (error); } SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT| CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Return process table, no threads"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_args, "Process argument list"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_env, "Process environment"); static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name, "Process syscall vector name (ABI type)"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD), sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Return process table, no threads"); #ifdef COMPAT_FREEBSD7 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries"); #endif static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries"); #if defined(STACK) || defined(DDB) static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks"); #endif static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit, "Process resource limits"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings, "Process ps_strings location"); static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask"); static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel, "Process binary osreldate"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp, "Process signal trampoline location"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGFASTBLK, sigfastblk, CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_sigfastblk, "Thread sigfastblock address"); int allproc_gen; /* * stop_all_proc() purpose is to stop all process which have usermode, * except current process for obvious reasons. This makes it somewhat * unreliable when invoked from multithreaded process. The service * must not be user-callable anyway. */ void stop_all_proc(void) { struct proc *cp, *p; int r, gen; bool restart, seen_stopped, seen_exiting, stopped_some; cp = curproc; allproc_loop: sx_xlock(&allproc_lock); gen = allproc_gen; seen_exiting = seen_stopped = stopped_some = restart = false; LIST_REMOVE(cp, p_list); LIST_INSERT_HEAD(&allproc, cp, p_list); for (;;) { p = LIST_NEXT(cp, p_list); if (p == NULL) break; LIST_REMOVE(cp, p_list); LIST_INSERT_AFTER(p, cp, p_list); PROC_LOCK(p); if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) { PROC_UNLOCK(p); continue; } if ((p->p_flag & P_WEXIT) != 0) { seen_exiting = true; PROC_UNLOCK(p); continue; } if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { /* * Stopped processes are tolerated when there * are no other processes which might continue * them. P_STOPPED_SINGLE but not * P_TOTAL_STOP process still has at least one * thread running. */ seen_stopped = true; PROC_UNLOCK(p); continue; } sx_xunlock(&allproc_lock); _PHOLD(p); r = thread_single(p, SINGLE_ALLPROC); if (r != 0) restart = true; else stopped_some = true; _PRELE(p); PROC_UNLOCK(p); sx_xlock(&allproc_lock); } /* Catch forked children we did not see in iteration. */ if (gen != allproc_gen) restart = true; sx_xunlock(&allproc_lock); if (restart || stopped_some || seen_exiting || seen_stopped) { kern_yield(PRI_USER); goto allproc_loop; } } void resume_all_proc(void) { struct proc *cp, *p; cp = curproc; sx_xlock(&allproc_lock); again: LIST_REMOVE(cp, p_list); LIST_INSERT_HEAD(&allproc, cp, p_list); for (;;) { p = LIST_NEXT(cp, p_list); if (p == NULL) break; LIST_REMOVE(cp, p_list); LIST_INSERT_AFTER(p, cp, p_list); PROC_LOCK(p); if ((p->p_flag & P_TOTAL_STOP) != 0) { sx_xunlock(&allproc_lock); _PHOLD(p); thread_single_end(p, SINGLE_ALLPROC); _PRELE(p); PROC_UNLOCK(p); sx_xlock(&allproc_lock); } else { PROC_UNLOCK(p); } } /* Did the loop above missed any stopped process ? */ FOREACH_PROC_IN_SYSTEM(p) { /* No need for proc lock. */ if ((p->p_flag & P_TOTAL_STOP) != 0) goto again; } sx_xunlock(&allproc_lock); } /* #define TOTAL_STOP_DEBUG 1 */ #ifdef TOTAL_STOP_DEBUG volatile static int ap_resume; #include static int sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS) { int error, val; val = 0; ap_resume = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val != 0) { stop_all_proc(); syncer_suspend(); while (ap_resume == 0) ; syncer_resume(); resume_all_proc(); } return (0); } SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0, sysctl_debug_stop_all_proc, "I", ""); #endif diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index ed4dd52b66d3..b1b4ccf4357c 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4250 +1,4250 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ONSIG 32 /* NSIG for osig* syscalls. XXX. */ SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE3(proc, , , signal__send, "struct thread *", "struct proc *", "int"); SDT_PROBE_DEFINE2(proc, , , signal__clear, "int", "ksiginfo_t *"); SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); static int coredump(struct thread *); static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi); static int issignal(struct thread *td); static void reschedule_signals(struct proc *p, sigset_t block, int flags); static int sigprop(int sig); static void tdsigwakeup(struct thread *, int, sig_t, int); static int sig_suspend_threads(struct thread *, struct proc *, int); static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); static int filt_signal(struct knote *kn, long hint); static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock); static void sigqueue_start(void); static uma_zone_t ksiginfo_zone = NULL; struct filterops sig_filtops = { .f_isfd = 0, .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, }; static int kern_logsigexit = 1; SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, &kern_logsigexit, 0, "Log processes quitting on abnormal signals to syslog(3)"); static int kern_forcesigexit = 1; SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW, &kern_forcesigexit, 0, "Force trap signal to be handled"); static SYSCTL_NODE(_kern, OID_AUTO, sigqueue, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "POSIX real time signal"); static int max_pending_per_proc = 128; SYSCTL_INT(_kern_sigqueue, OID_AUTO, max_pending_per_proc, CTLFLAG_RW, &max_pending_per_proc, 0, "Max pending signals per proc"); static int preallocate_siginfo = 1024; SYSCTL_INT(_kern_sigqueue, OID_AUTO, preallocate, CTLFLAG_RDTUN, &preallocate_siginfo, 0, "Preallocated signal memory size"); static int signal_overflow = 0; SYSCTL_INT(_kern_sigqueue, OID_AUTO, overflow, CTLFLAG_RD, &signal_overflow, 0, "Number of signals overflew"); static int signal_alloc_fail = 0; SYSCTL_INT(_kern_sigqueue, OID_AUTO, alloc_fail, CTLFLAG_RD, &signal_alloc_fail, 0, "signals failed to be allocated"); static int kern_lognosys = 0; SYSCTL_INT(_kern, OID_AUTO, lognosys, CTLFLAG_RWTUN, &kern_lognosys, 0, "Log invalid syscalls"); __read_frequently bool sigfastblock_fetch_always = false; SYSCTL_BOOL(_kern, OID_AUTO, sigfastblock_fetch_always, CTLFLAG_RWTUN, &sigfastblock_fetch_always, 0, "Fetch sigfastblock word on each syscall entry for proper " "blocking semantic"); SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL); /* * Policy -- Can ucred cr1 send SIGIO to process cr2? * Should use cr_cansignal() once cr_cansignal() allows SIGIO and SIGURG * in the right situations. */ #define CANSIGIO(cr1, cr2) \ ((cr1)->cr_uid == 0 || \ (cr1)->cr_ruid == (cr2)->cr_ruid || \ (cr1)->cr_uid == (cr2)->cr_ruid || \ (cr1)->cr_ruid == (cr2)->cr_uid || \ (cr1)->cr_uid == (cr2)->cr_uid) static int sugid_coredump; SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); static int capmode_coredump; SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, &capmode_coredump, 0, "Allow processes in capability mode to dump core"); static int do_coredump = 1; SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, &do_coredump, 0, "Enable/Disable coredumps"); static int set_core_nodump_flag = 0; SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, 0, "Enable setting the NODUMP flag on coredump files"); static int coredump_devctl = 0; SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, 0, "Generate a devctl notification when processes coredump"); /* * Signal properties and actions. * The array below categorizes the signals and their default actions * according to the following properties: */ #define SIGPROP_KILL 0x01 /* terminates process by default */ #define SIGPROP_CORE 0x02 /* ditto and coredumps */ #define SIGPROP_STOP 0x04 /* suspend process */ #define SIGPROP_TTYSTOP 0x08 /* ditto, from tty */ #define SIGPROP_IGNORE 0x10 /* ignore by default */ #define SIGPROP_CONT 0x20 /* continue if suspended */ #define SIGPROP_CANTMASK 0x40 /* non-maskable, catchable */ static int sigproptbl[NSIG] = { [SIGHUP] = SIGPROP_KILL, [SIGINT] = SIGPROP_KILL, [SIGQUIT] = SIGPROP_KILL | SIGPROP_CORE, [SIGILL] = SIGPROP_KILL | SIGPROP_CORE, [SIGTRAP] = SIGPROP_KILL | SIGPROP_CORE, [SIGABRT] = SIGPROP_KILL | SIGPROP_CORE, [SIGEMT] = SIGPROP_KILL | SIGPROP_CORE, [SIGFPE] = SIGPROP_KILL | SIGPROP_CORE, [SIGKILL] = SIGPROP_KILL, [SIGBUS] = SIGPROP_KILL | SIGPROP_CORE, [SIGSEGV] = SIGPROP_KILL | SIGPROP_CORE, [SIGSYS] = SIGPROP_KILL | SIGPROP_CORE, [SIGPIPE] = SIGPROP_KILL, [SIGALRM] = SIGPROP_KILL, [SIGTERM] = SIGPROP_KILL, [SIGURG] = SIGPROP_IGNORE, [SIGSTOP] = SIGPROP_STOP, [SIGTSTP] = SIGPROP_STOP | SIGPROP_TTYSTOP, [SIGCONT] = SIGPROP_IGNORE | SIGPROP_CONT, [SIGCHLD] = SIGPROP_IGNORE, [SIGTTIN] = SIGPROP_STOP | SIGPROP_TTYSTOP, [SIGTTOU] = SIGPROP_STOP | SIGPROP_TTYSTOP, [SIGIO] = SIGPROP_IGNORE, [SIGXCPU] = SIGPROP_KILL, [SIGXFSZ] = SIGPROP_KILL, [SIGVTALRM] = SIGPROP_KILL, [SIGPROF] = SIGPROP_KILL, [SIGWINCH] = SIGPROP_IGNORE, [SIGINFO] = SIGPROP_IGNORE, [SIGUSR1] = SIGPROP_KILL, [SIGUSR2] = SIGPROP_KILL, }; sigset_t fastblock_mask; static void sigqueue_start(void) { ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_prealloc(ksiginfo_zone, preallocate_siginfo); p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS); p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1); p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc); SIGFILLSET(fastblock_mask); SIG_CANTMASK(fastblock_mask); } ksiginfo_t * ksiginfo_alloc(int wait) { int flags; flags = M_ZERO; if (! wait) flags |= M_NOWAIT; if (ksiginfo_zone != NULL) return ((ksiginfo_t *)uma_zalloc(ksiginfo_zone, flags)); return (NULL); } void ksiginfo_free(ksiginfo_t *ksi) { uma_zfree(ksiginfo_zone, ksi); } static __inline int ksiginfo_tryfree(ksiginfo_t *ksi) { if (!(ksi->ksi_flags & KSI_EXT)) { uma_zfree(ksiginfo_zone, ksi); return (1); } return (0); } void sigqueue_init(sigqueue_t *list, struct proc *p) { SIGEMPTYSET(list->sq_signals); SIGEMPTYSET(list->sq_kill); SIGEMPTYSET(list->sq_ptrace); TAILQ_INIT(&list->sq_list); list->sq_proc = p; list->sq_flags = SQ_INIT; } /* * Get a signal's ksiginfo. * Return: * 0 - signal not found * others - signal number */ static int sigqueue_get(sigqueue_t *sq, int signo, ksiginfo_t *si) { struct proc *p = sq->sq_proc; struct ksiginfo *ksi, *next; int count = 0; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (!SIGISMEMBER(sq->sq_signals, signo)) return (0); if (SIGISMEMBER(sq->sq_ptrace, signo)) { count++; SIGDELSET(sq->sq_ptrace, signo); si->ksi_flags |= KSI_PTRACE; } if (SIGISMEMBER(sq->sq_kill, signo)) { count++; if (count == 1) SIGDELSET(sq->sq_kill, signo); } TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) { if (ksi->ksi_signo == signo) { if (count == 0) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; ksiginfo_copy(ksi, si); if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } if (++count > 1) break; } } if (count <= 1) SIGDELSET(sq->sq_signals, signo); si->ksi_signo = signo; return (signo); } void sigqueue_take(ksiginfo_t *ksi) { struct ksiginfo *kp; struct proc *p; sigqueue_t *sq; if (ksi == NULL || (sq = ksi->ksi_sigq) == NULL) return; p = sq->sq_proc; TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (!(ksi->ksi_flags & KSI_EXT) && p != NULL) p->p_pendingcnt--; for (kp = TAILQ_FIRST(&sq->sq_list); kp != NULL; kp = TAILQ_NEXT(kp, ksi_link)) { if (kp->ksi_signo == ksi->ksi_signo) break; } if (kp == NULL && !SIGISMEMBER(sq->sq_kill, ksi->ksi_signo) && !SIGISMEMBER(sq->sq_ptrace, ksi->ksi_signo)) SIGDELSET(sq->sq_signals, ksi->ksi_signo); } static int sigqueue_add(sigqueue_t *sq, int signo, ksiginfo_t *si) { struct proc *p = sq->sq_proc; struct ksiginfo *ksi; int ret = 0; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); /* * SIGKILL/SIGSTOP cannot be caught or masked, so take the fast path * for these signals. */ if (signo == SIGKILL || signo == SIGSTOP || si == NULL) { SIGADDSET(sq->sq_kill, signo); goto out_set_bit; } /* directly insert the ksi, don't copy it */ if (si->ksi_flags & KSI_INS) { if (si->ksi_flags & KSI_HEAD) TAILQ_INSERT_HEAD(&sq->sq_list, si, ksi_link); else TAILQ_INSERT_TAIL(&sq->sq_list, si, ksi_link); si->ksi_sigq = sq; goto out_set_bit; } if (__predict_false(ksiginfo_zone == NULL)) { SIGADDSET(sq->sq_kill, signo); goto out_set_bit; } if (p != NULL && p->p_pendingcnt >= max_pending_per_proc) { signal_overflow++; ret = EAGAIN; } else if ((ksi = ksiginfo_alloc(0)) == NULL) { signal_alloc_fail++; ret = EAGAIN; } else { if (p != NULL) p->p_pendingcnt++; ksiginfo_copy(si, ksi); ksi->ksi_signo = signo; if (si->ksi_flags & KSI_HEAD) TAILQ_INSERT_HEAD(&sq->sq_list, ksi, ksi_link); else TAILQ_INSERT_TAIL(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = sq; } if (ret != 0) { if ((si->ksi_flags & KSI_PTRACE) != 0) { SIGADDSET(sq->sq_ptrace, signo); ret = 0; goto out_set_bit; } else if ((si->ksi_flags & KSI_TRAP) != 0 || (si->ksi_flags & KSI_SIGQ) == 0) { SIGADDSET(sq->sq_kill, signo); ret = 0; goto out_set_bit; } return (ret); } out_set_bit: SIGADDSET(sq->sq_signals, signo); return (ret); } void sigqueue_flush(sigqueue_t *sq) { struct proc *p = sq->sq_proc; ksiginfo_t *ksi; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } SIGEMPTYSET(sq->sq_signals); SIGEMPTYSET(sq->sq_kill); SIGEMPTYSET(sq->sq_ptrace); } static void sigqueue_move_set(sigqueue_t *src, sigqueue_t *dst, const sigset_t *set) { sigset_t tmp; struct proc *p1, *p2; ksiginfo_t *ksi, *next; KASSERT(src->sq_flags & SQ_INIT, ("src sigqueue not inited")); KASSERT(dst->sq_flags & SQ_INIT, ("dst sigqueue not inited")); p1 = src->sq_proc; p2 = dst->sq_proc; /* Move siginfo to target list */ TAILQ_FOREACH_SAFE(ksi, &src->sq_list, ksi_link, next) { if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&src->sq_list, ksi, ksi_link); if (p1 != NULL) p1->p_pendingcnt--; TAILQ_INSERT_TAIL(&dst->sq_list, ksi, ksi_link); ksi->ksi_sigq = dst; if (p2 != NULL) p2->p_pendingcnt++; } } /* Move pending bits to target list */ tmp = src->sq_kill; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_kill, tmp); SIGSETNAND(src->sq_kill, tmp); tmp = src->sq_ptrace; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_ptrace, tmp); SIGSETNAND(src->sq_ptrace, tmp); tmp = src->sq_signals; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_signals, tmp); SIGSETNAND(src->sq_signals, tmp); } #if 0 static void sigqueue_move(sigqueue_t *src, sigqueue_t *dst, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_move_set(src, dst, &set); } #endif static void sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set) { struct proc *p = sq->sq_proc; ksiginfo_t *ksi, *next; KASSERT(sq->sq_flags & SQ_INIT, ("src sigqueue not inited")); /* Remove siginfo queue */ TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) { if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } } SIGSETNAND(sq->sq_kill, *set); SIGSETNAND(sq->sq_ptrace, *set); SIGSETNAND(sq->sq_signals, *set); } void sigqueue_delete(sigqueue_t *sq, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_delete_set(sq, &set); } /* Remove a set of signals for a process */ static void sigqueue_delete_set_proc(struct proc *p, const sigset_t *set) { sigqueue_t worklist; struct thread *td0; PROC_LOCK_ASSERT(p, MA_OWNED); sigqueue_init(&worklist, NULL); sigqueue_move_set(&p->p_sigqueue, &worklist, set); FOREACH_THREAD_IN_PROC(p, td0) sigqueue_move_set(&td0->td_sigqueue, &worklist, set); sigqueue_flush(&worklist); } void sigqueue_delete_proc(struct proc *p, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_delete_set_proc(p, &set); } static void sigqueue_delete_stopmask_proc(struct proc *p) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, SIGSTOP); SIGADDSET(set, SIGTSTP); SIGADDSET(set, SIGTTIN); SIGADDSET(set, SIGTTOU); sigqueue_delete_set_proc(p, &set); } /* * Determine signal that should be delivered to thread td, the current * thread, 0 if none. If there is a pending stop signal with default * action, the process stops in issignal(). */ int cursig(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_NOTOWNED); return (SIGPENDING(td) ? issignal(td) : 0); } /* * Arrange for ast() to handle unmasked pending signals on return to user * mode. This must be called whenever a signal is added to td_sigqueue or * unmasked in td_sigmask. */ void signotify(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (SIGPENDING(td)) { thread_lock(td); td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING; thread_unlock(td); } } /* * Returns 1 (true) if altstack is configured for the thread, and the * passed stack bottom address falls into the altstack range. Handles * the 43 compat special case where the alt stack size is zero. */ int sigonstack(size_t sp) { struct thread *td; td = curthread; if ((td->td_pflags & TDP_ALTSTACK) == 0) return (0); #if defined(COMPAT_43) if (SV_PROC_FLAG(td->td_proc, SV_AOUT) && td->td_sigstk.ss_size == 0) return ((td->td_sigstk.ss_flags & SS_ONSTACK) != 0); #endif return (sp >= (size_t)td->td_sigstk.ss_sp && sp < td->td_sigstk.ss_size + (size_t)td->td_sigstk.ss_sp); } static __inline int sigprop(int sig) { if (sig > 0 && sig < nitems(sigproptbl)) return (sigproptbl[sig]); return (0); } int sig_ffs(sigset_t *set) { int i; for (i = 0; i < _SIG_WORDS; i++) if (set->__bits[i]) return (ffs(set->__bits[i]) + (i * 32)); return (0); } static bool sigact_flag_test(const struct sigaction *act, int flag) { /* * SA_SIGINFO is reset when signal disposition is set to * ignore or default. Other flags are kept according to user * settings. */ return ((act->sa_flags & flag) != 0 && (flag != SA_SIGINFO || ((__sighandler_t *)act->sa_sigaction != SIG_IGN && (__sighandler_t *)act->sa_sigaction != SIG_DFL))); } /* * kern_sigaction * sigaction * freebsd4_sigaction * osigaction */ int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags) { struct sigacts *ps; struct proc *p = td->td_proc; if (!_SIG_VALID(sig)) return (EINVAL); if (act != NULL && act->sa_handler != SIG_DFL && act->sa_handler != SIG_IGN && (act->sa_flags & ~(SA_ONSTACK | SA_RESTART | SA_RESETHAND | SA_NOCLDSTOP | SA_NODEFER | SA_NOCLDWAIT | SA_SIGINFO)) != 0) return (EINVAL); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); if (oact) { memset(oact, 0, sizeof(*oact)); oact->sa_mask = ps->ps_catchmask[_SIG_IDX(sig)]; if (SIGISMEMBER(ps->ps_sigonstack, sig)) oact->sa_flags |= SA_ONSTACK; if (!SIGISMEMBER(ps->ps_sigintr, sig)) oact->sa_flags |= SA_RESTART; if (SIGISMEMBER(ps->ps_sigreset, sig)) oact->sa_flags |= SA_RESETHAND; if (SIGISMEMBER(ps->ps_signodefer, sig)) oact->sa_flags |= SA_NODEFER; if (SIGISMEMBER(ps->ps_siginfo, sig)) { oact->sa_flags |= SA_SIGINFO; oact->sa_sigaction = (__siginfohandler_t *)ps->ps_sigact[_SIG_IDX(sig)]; } else oact->sa_handler = ps->ps_sigact[_SIG_IDX(sig)]; if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDSTOP) oact->sa_flags |= SA_NOCLDSTOP; if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDWAIT) oact->sa_flags |= SA_NOCLDWAIT; } if (act) { if ((sig == SIGKILL || sig == SIGSTOP) && act->sa_handler != SIG_DFL) { mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); return (EINVAL); } /* * Change setting atomically. */ ps->ps_catchmask[_SIG_IDX(sig)] = act->sa_mask; SIG_CANTMASK(ps->ps_catchmask[_SIG_IDX(sig)]); if (sigact_flag_test(act, SA_SIGINFO)) { ps->ps_sigact[_SIG_IDX(sig)] = (__sighandler_t *)act->sa_sigaction; SIGADDSET(ps->ps_siginfo, sig); } else { ps->ps_sigact[_SIG_IDX(sig)] = act->sa_handler; SIGDELSET(ps->ps_siginfo, sig); } if (!sigact_flag_test(act, SA_RESTART)) SIGADDSET(ps->ps_sigintr, sig); else SIGDELSET(ps->ps_sigintr, sig); if (sigact_flag_test(act, SA_ONSTACK)) SIGADDSET(ps->ps_sigonstack, sig); else SIGDELSET(ps->ps_sigonstack, sig); if (sigact_flag_test(act, SA_RESETHAND)) SIGADDSET(ps->ps_sigreset, sig); else SIGDELSET(ps->ps_sigreset, sig); if (sigact_flag_test(act, SA_NODEFER)) SIGADDSET(ps->ps_signodefer, sig); else SIGDELSET(ps->ps_signodefer, sig); if (sig == SIGCHLD) { if (act->sa_flags & SA_NOCLDSTOP) ps->ps_flag |= PS_NOCLDSTOP; else ps->ps_flag &= ~PS_NOCLDSTOP; if (act->sa_flags & SA_NOCLDWAIT) { /* * Paranoia: since SA_NOCLDWAIT is implemented * by reparenting the dying child to PID 1 (and * trust it to reap the zombie), PID 1 itself * is forbidden to set SA_NOCLDWAIT. */ if (p->p_pid == 1) ps->ps_flag &= ~PS_NOCLDWAIT; else ps->ps_flag |= PS_NOCLDWAIT; } else ps->ps_flag &= ~PS_NOCLDWAIT; if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN) ps->ps_flag |= PS_CLDSIGIGN; else ps->ps_flag &= ~PS_CLDSIGIGN; } /* * Set bit in ps_sigignore for signals that are set to SIG_IGN, * and for signals set to SIG_DFL where the default is to * ignore. However, don't put SIGCONT in ps_sigignore, as we * have to restart the process. */ if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || (sigprop(sig) & SIGPROP_IGNORE && ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) { /* never to be seen again */ sigqueue_delete_proc(p, sig); if (sig != SIGCONT) /* easier in psignal */ SIGADDSET(ps->ps_sigignore, sig); SIGDELSET(ps->ps_sigcatch, sig); } else { SIGDELSET(ps->ps_sigignore, sig); if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL) SIGDELSET(ps->ps_sigcatch, sig); else SIGADDSET(ps->ps_sigcatch, sig); } #ifdef COMPAT_FREEBSD4 if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL || (flags & KSA_FREEBSD4) == 0) SIGDELSET(ps->ps_freebsd4, sig); else SIGADDSET(ps->ps_freebsd4, sig); #endif #ifdef COMPAT_43 if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL || (flags & KSA_OSIGSET) == 0) SIGDELSET(ps->ps_osigset, sig); else SIGADDSET(ps->ps_osigset, sig); #endif } mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); return (0); } #ifndef _SYS_SYSPROTO_H_ struct sigaction_args { int sig; struct sigaction *act; struct sigaction *oact; }; #endif int sys_sigaction(struct thread *td, struct sigaction_args *uap) { struct sigaction act, oact; struct sigaction *actp, *oactp; int error; actp = (uap->act != NULL) ? &act : NULL; oactp = (uap->oact != NULL) ? &oact : NULL; if (actp) { error = copyin(uap->act, actp, sizeof(act)); if (error) return (error); } error = kern_sigaction(td, uap->sig, actp, oactp, 0); if (oactp && !error) error = copyout(oactp, uap->oact, sizeof(oact)); return (error); } #ifdef COMPAT_FREEBSD4 #ifndef _SYS_SYSPROTO_H_ struct freebsd4_sigaction_args { int sig; struct sigaction *act; struct sigaction *oact; }; #endif int freebsd4_sigaction(struct thread *td, struct freebsd4_sigaction_args *uap) { struct sigaction act, oact; struct sigaction *actp, *oactp; int error; actp = (uap->act != NULL) ? &act : NULL; oactp = (uap->oact != NULL) ? &oact : NULL; if (actp) { error = copyin(uap->act, actp, sizeof(act)); if (error) return (error); } error = kern_sigaction(td, uap->sig, actp, oactp, KSA_FREEBSD4); if (oactp && !error) error = copyout(oactp, uap->oact, sizeof(oact)); return (error); } #endif /* COMAPT_FREEBSD4 */ #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigaction_args { int signum; struct osigaction *nsa; struct osigaction *osa; }; #endif int osigaction(struct thread *td, struct osigaction_args *uap) { struct osigaction sa; struct sigaction nsa, osa; struct sigaction *nsap, *osap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); nsap = (uap->nsa != NULL) ? &nsa : NULL; osap = (uap->osa != NULL) ? &osa : NULL; if (nsap) { error = copyin(uap->nsa, &sa, sizeof(sa)); if (error) return (error); nsap->sa_handler = sa.sa_handler; nsap->sa_flags = sa.sa_flags; OSIG2SIG(sa.sa_mask, nsap->sa_mask); } error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET); if (osap && !error) { sa.sa_handler = osap->sa_handler; sa.sa_flags = osap->sa_flags; SIG2OSIG(osap->sa_mask, sa.sa_mask); error = copyout(&sa, uap->osa, sizeof(sa)); } return (error); } #if !defined(__i386__) /* Avoid replicating the same stub everywhere */ int osigreturn(struct thread *td, struct osigreturn_args *uap) { return (nosys(td, (struct nosys_args *)uap)); } #endif #endif /* COMPAT_43 */ /* * Initialize signal state for process 0; * set to ignore signals that are ignored by default. */ void siginit(struct proc *p) { int i; struct sigacts *ps; PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); for (i = 1; i <= NSIG; i++) { if (sigprop(i) & SIGPROP_IGNORE && i != SIGCONT) { SIGADDSET(ps->ps_sigignore, i); } } mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); } /* * Reset specified signal to the default disposition. */ static void sigdflt(struct sigacts *ps, int sig) { mtx_assert(&ps->ps_mtx, MA_OWNED); SIGDELSET(ps->ps_sigcatch, sig); if ((sigprop(sig) & SIGPROP_IGNORE) != 0 && sig != SIGCONT) SIGADDSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; SIGDELSET(ps->ps_siginfo, sig); } /* * Reset signals for an exec of the specified process. */ void execsigs(struct proc *p) { sigset_t osigignore; struct sigacts *ps; int sig; struct thread *td; /* * Reset caught signals. Held signals remain held * through td_sigmask (unless they were caught, * and are now ignored by default). */ PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); sig_drop_caught(p); /* * As CloudABI processes cannot modify signal handlers, fully * reset all signals to their default behavior. Do ignore * SIGPIPE, as it would otherwise be impossible to recover from * writes to broken pipes and sockets. */ if (SV_PROC_ABI(p) == SV_ABI_CLOUDABI) { osigignore = ps->ps_sigignore; while (SIGNOTEMPTY(osigignore)) { sig = sig_ffs(&osigignore); SIGDELSET(osigignore, sig); if (sig != SIGPIPE) sigdflt(ps, sig); } SIGADDSET(ps->ps_sigignore, SIGPIPE); } /* * Reset stack state to the user stack. * Clear set of signals caught on the signal stack. */ td = curthread; MPASS(td->td_proc == p); td->td_sigstk.ss_flags = SS_DISABLE; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_sp = 0; td->td_pflags &= ~TDP_ALTSTACK; /* * Reset no zombies if child dies flag as Solaris does. */ ps->ps_flag &= ~(PS_NOCLDWAIT | PS_CLDSIGIGN); if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN) ps->ps_sigact[_SIG_IDX(SIGCHLD)] = SIG_DFL; mtx_unlock(&ps->ps_mtx); } /* * kern_sigprocmask() * * Manipulate signal mask. */ int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags) { sigset_t new_block, oset1; struct proc *p; int error; p = td->td_proc; if ((flags & SIGPROCMASK_PROC_LOCKED) != 0) PROC_LOCK_ASSERT(p, MA_OWNED); else PROC_LOCK(p); mtx_assert(&p->p_sigacts->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ? MA_OWNED : MA_NOTOWNED); if (oset != NULL) *oset = td->td_sigmask; error = 0; if (set != NULL) { switch (how) { case SIG_BLOCK: SIG_CANTMASK(*set); oset1 = td->td_sigmask; SIGSETOR(td->td_sigmask, *set); new_block = td->td_sigmask; SIGSETNAND(new_block, oset1); break; case SIG_UNBLOCK: SIGSETNAND(td->td_sigmask, *set); signotify(td); goto out; case SIG_SETMASK: SIG_CANTMASK(*set); oset1 = td->td_sigmask; if (flags & SIGPROCMASK_OLD) SIGSETLO(td->td_sigmask, *set); else td->td_sigmask = *set; new_block = td->td_sigmask; SIGSETNAND(new_block, oset1); signotify(td); break; default: error = EINVAL; goto out; } /* * The new_block set contains signals that were not previously * blocked, but are blocked now. * * In case we block any signal that was not previously blocked * for td, and process has the signal pending, try to schedule * signal delivery to some thread that does not block the * signal, possibly waking it up. */ if (p->p_numthreads != 1) reschedule_signals(p, new_block, flags); } out: if (!(flags & SIGPROCMASK_PROC_LOCKED)) PROC_UNLOCK(p); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigprocmask_args { int how; const sigset_t *set; sigset_t *oset; }; #endif int sys_sigprocmask(struct thread *td, struct sigprocmask_args *uap) { sigset_t set, oset; sigset_t *setp, *osetp; int error; setp = (uap->set != NULL) ? &set : NULL; osetp = (uap->oset != NULL) ? &oset : NULL; if (setp) { error = copyin(uap->set, setp, sizeof(set)); if (error) return (error); } error = kern_sigprocmask(td, uap->how, setp, osetp, 0); if (osetp && !error) { error = copyout(osetp, uap->oset, sizeof(oset)); } return (error); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigprocmask_args { int how; osigset_t mask; }; #endif int osigprocmask(struct thread *td, struct osigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, 1); SIG2OSIG(oset, td->td_retval[0]); return (error); } #endif /* COMPAT_43 */ int sys_sigwait(struct thread *td, struct sigwait_args *uap) { ksiginfo_t ksi; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) { td->td_retval[0] = error; return (0); } error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) { if (error == EINTR && td->td_proc->p_osrel < P_OSREL_SIGWAIT) error = ERESTART; if (error == ERESTART) return (error); td->td_retval[0] = error; return (0); } error = copyout(&ksi.ksi_signo, uap->sig, sizeof(ksi.ksi_signo)); td->td_retval[0] = error; return (0); } int sys_sigtimedwait(struct thread *td, struct sigtimedwait_args *uap) { struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t)); if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int sys_sigwaitinfo(struct thread *td, struct sigwaitinfo_args *uap) { ksiginfo_t ksi; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t)); if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } static void proc_td_siginfo_capture(struct thread *td, siginfo_t *si) { struct thread *thr; FOREACH_THREAD_IN_PROC(td->td_proc, thr) { if (thr == td) thr->td_si = *si; else thr->td_si.si_signo = 0; } } int kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi, struct timespec *timeout) { struct sigacts *ps; sigset_t saved_mask, new_block; struct proc *p; int error, sig, timo, timevalid = 0; struct timespec rts, ets, ts; struct timeval tv; bool traced; p = td->td_proc; error = 0; ets.tv_sec = 0; ets.tv_nsec = 0; traced = false; if (timeout != NULL) { if (timeout->tv_nsec >= 0 && timeout->tv_nsec < 1000000000) { timevalid = 1; getnanouptime(&rts); timespecadd(&rts, timeout, &ets); } } ksiginfo_init(ksi); /* Some signals can not be waited for. */ SIG_CANTMASK(waitset); ps = p->p_sigacts; PROC_LOCK(p); saved_mask = td->td_sigmask; SIGSETNAND(td->td_sigmask, waitset); for (;;) { mtx_lock(&ps->ps_mtx); sig = cursig(td); mtx_unlock(&ps->ps_mtx); KASSERT(sig >= 0, ("sig %d", sig)); if (sig != 0 && SIGISMEMBER(waitset, sig)) { if (sigqueue_get(&td->td_sigqueue, sig, ksi) != 0 || sigqueue_get(&p->p_sigqueue, sig, ksi) != 0) { error = 0; break; } } if (error != 0) break; /* * POSIX says this must be checked after looking for pending * signals. */ if (timeout != NULL) { if (!timevalid) { error = EINVAL; break; } getnanouptime(&rts); if (timespeccmp(&rts, &ets, >=)) { error = EAGAIN; break; } timespecsub(&ets, &rts, &ts); TIMESPEC_TO_TIMEVAL(&tv, &ts); timo = tvtohz(&tv); } else { timo = 0; } if (traced) { error = EINTR; break; } error = msleep(ps, &p->p_mtx, PPAUSE|PCATCH, "sigwait", timo); if (timeout != NULL) { if (error == ERESTART) { /* Timeout can not be restarted. */ error = EINTR; } else if (error == EAGAIN) { /* We will calculate timeout by ourself. */ error = 0; } } /* * If PTRACE_SCE or PTRACE_SCX were set after * userspace entered the syscall, return spurious * EINTR after wait was done. Only do this as last * resort after rechecking for possible queued signals * and expired timeouts. */ if (error == 0 && (p->p_ptevents & PTRACE_SYSCALL) != 0) traced = true; } new_block = saved_mask; SIGSETNAND(new_block, td->td_sigmask); td->td_sigmask = saved_mask; /* * Fewer signals can be delivered to us, reschedule signal * notification. */ if (p->p_numthreads != 1) reschedule_signals(p, new_block, 0); if (error == 0) { SDT_PROBE2(proc, , , signal__clear, sig, ksi); if (ksi->ksi_code == SI_TIMER) itimer_accept(p, ksi->ksi_timerid, ksi); #ifdef KTRACE if (KTRPOINT(td, KTR_PSIG)) { sig_t action; mtx_lock(&ps->ps_mtx); action = ps->ps_sigact[_SIG_IDX(sig)]; mtx_unlock(&ps->ps_mtx); ktrpsig(sig, action, &td->td_sigmask, ksi->ksi_code); } #endif if (sig == SIGKILL) { proc_td_siginfo_capture(td, &ksi->ksi_info); sigexit(td, sig); } } PROC_UNLOCK(p); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigpending_args { sigset_t *set; }; #endif int sys_sigpending(struct thread *td, struct sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t pending; PROC_LOCK(p); pending = p->p_sigqueue.sq_signals; SIGSETOR(pending, td->td_sigqueue.sq_signals); PROC_UNLOCK(p); return (copyout(&pending, uap->set, sizeof(sigset_t))); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigpending_args { int dummy; }; #endif int osigpending(struct thread *td, struct osigpending_args *uap) { struct proc *p = td->td_proc; sigset_t pending; PROC_LOCK(p); pending = p->p_sigqueue.sq_signals; SIGSETOR(pending, td->td_sigqueue.sq_signals); PROC_UNLOCK(p); SIG2OSIG(pending, td->td_retval[0]); return (0); } #endif /* COMPAT_43 */ #if defined(COMPAT_43) /* * Generalized interface signal handler, 4.3-compatible. */ #ifndef _SYS_SYSPROTO_H_ struct osigvec_args { int signum; struct sigvec *nsv; struct sigvec *osv; }; #endif /* ARGSUSED */ int osigvec(struct thread *td, struct osigvec_args *uap) { struct sigvec vec; struct sigaction nsa, osa; struct sigaction *nsap, *osap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); nsap = (uap->nsv != NULL) ? &nsa : NULL; osap = (uap->osv != NULL) ? &osa : NULL; if (nsap) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); nsap->sa_handler = vec.sv_handler; OSIG2SIG(vec.sv_mask, nsap->sa_mask); nsap->sa_flags = vec.sv_flags; nsap->sa_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */ } error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET); if (osap && !error) { vec.sv_handler = osap->sa_handler; SIG2OSIG(osap->sa_mask, vec.sv_mask); vec.sv_flags = osap->sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct osigblock_args { int mask; }; #endif int osigblock(struct thread *td, struct osigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } #ifndef _SYS_SYSPROTO_H_ struct osigsetmask_args { int mask; }; #endif int osigsetmask(struct thread *td, struct osigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } #endif /* COMPAT_43 */ /* * Suspend calling thread until signal, providing mask to be set in the * meantime. */ #ifndef _SYS_SYSPROTO_H_ struct sigsuspend_args { const sigset_t *sigmask; }; #endif /* ARGSUSED */ int sys_sigsuspend(struct thread *td, struct sigsuspend_args *uap) { sigset_t mask; int error; error = copyin(uap->sigmask, &mask, sizeof(mask)); if (error) return (error); return (kern_sigsuspend(td, mask)); } int kern_sigsuspend(struct thread *td, sigset_t mask) { struct proc *p = td->td_proc; int has_sig, sig; /* * When returning from sigsuspend, we want * the old mask to be restored after the * signal handler has finished. Thus, we * save it here and mark the sigacts structure * to indicate this. */ PROC_LOCK(p); kern_sigprocmask(td, SIG_SETMASK, &mask, &td->td_oldsigmask, SIGPROCMASK_PROC_LOCKED); td->td_pflags |= TDP_OLDMASK; /* * Process signals now. Otherwise, we can get spurious wakeup * due to signal entered process queue, but delivered to other * thread. But sigsuspend should return only on signal * delivery. */ (p->p_sysent->sv_set_syscall_retval)(td, EINTR); for (has_sig = 0; !has_sig;) { while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "pause", 0) == 0) /* void */; thread_suspend_check(0); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) { KASSERT(sig >= 0, ("sig %d", sig)); has_sig += postsig(sig); } mtx_unlock(&p->p_sigacts->ps_mtx); /* * If PTRACE_SCE or PTRACE_SCX were set after * userspace entered the syscall, return spurious * EINTR. */ if ((p->p_ptevents & PTRACE_SYSCALL) != 0) has_sig += 1; } PROC_UNLOCK(p); td->td_errno = EINTR; td->td_pflags |= TDP_NERRNO; return (EJUSTRETURN); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ /* * Compatibility sigsuspend call for old binaries. Note nonstandard calling * convention: libc stub passes mask, not pointer, to save a copyin. */ #ifndef _SYS_SYSPROTO_H_ struct osigsuspend_args { osigset_t mask; }; #endif /* ARGSUSED */ int osigsuspend(struct thread *td, struct osigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } #endif /* COMPAT_43 */ #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osigstack_args { struct sigstack *nss; struct sigstack *oss; }; #endif /* ARGSUSED */ int osigstack(struct thread *td, struct osigstack_args *uap) { struct sigstack nss, oss; int error = 0; if (uap->nss != NULL) { error = copyin(uap->nss, &nss, sizeof(nss)); if (error) return (error); } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (uap->nss != NULL) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= nss.ss_onstack & SS_ONSTACK; td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) error = copyout(&oss, uap->oss, sizeof(oss)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct sigaltstack_args { stack_t *ss; stack_t *oss; }; #endif /* ARGSUSED */ int sys_sigaltstack(struct thread *td, struct sigaltstack_args *uap) { stack_t ss, oss; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &ss, sizeof(ss)); if (error) return (error); } error = kern_sigaltstack(td, (uap->ss != NULL) ? &ss : NULL, (uap->oss != NULL) ? &oss : NULL); if (error) return (error); if (uap->oss != NULL) error = copyout(&oss, uap->oss, sizeof(stack_t)); return (error); } int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss) { struct proc *p = td->td_proc; int oonstack; oonstack = sigonstack(cpu_getstack(td)); if (oss != NULL) { *oss = td->td_sigstk; oss->ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; } if (ss != NULL) { if (oonstack) return (EPERM); if ((ss->ss_flags & ~SS_DISABLE) != 0) return (EINVAL); if (!(ss->ss_flags & SS_DISABLE)) { if (ss->ss_size < p->p_sysent->sv_minsigstksz) return (ENOMEM); td->td_sigstk = *ss; td->td_pflags |= TDP_ALTSTACK; } else { td->td_pflags &= ~TDP_ALTSTACK; } } return (0); } struct killpg1_ctx { struct thread *td; ksiginfo_t *ksi; int sig; bool sent; bool found; int ret; }; static void killpg1_sendsig(struct proc *p, bool notself, struct killpg1_ctx *arg) { int err; if (p->p_pid <= 1 || (p->p_flag & P_SYSTEM) != 0 || (notself && p == arg->td->td_proc) || p->p_state == PRS_NEW) return; PROC_LOCK(p); err = p_cansignal(arg->td, p, arg->sig); if (err == 0 && arg->sig != 0) pksignal(p, arg->sig, arg->ksi); PROC_UNLOCK(p); if (err != ESRCH) arg->found = true; if (err == 0) arg->sent = true; else if (arg->ret == 0 && err != ESRCH && err != EPERM) arg->ret = err; } /* * Common code for kill process group/broadcast kill. * cp is calling process. */ static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi) { struct proc *p; struct pgrp *pgrp; struct killpg1_ctx arg; arg.td = td; arg.ksi = ksi; arg.sig = sig; arg.sent = false; arg.found = false; arg.ret = 0; if (all) { /* * broadcast */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { killpg1_sendsig(p, true, &arg); } sx_sunlock(&allproc_lock); } else { sx_slock(&proctree_lock); if (pgid == 0) { /* * zero pgid means send to my process group. */ pgrp = td->td_proc->p_pgrp; PGRP_LOCK(pgrp); } else { pgrp = pgfind(pgid); if (pgrp == NULL) { sx_sunlock(&proctree_lock); return (ESRCH); } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { killpg1_sendsig(p, false, &arg); } PGRP_UNLOCK(pgrp); } MPASS(arg.ret != 0 || arg.found || !arg.sent); if (arg.ret == 0 && !arg.sent) arg.ret = arg.found ? EPERM : ESRCH; return (arg.ret); } #ifndef _SYS_SYSPROTO_H_ struct kill_args { int pid; int signum; }; #endif /* ARGSUSED */ int sys_kill(struct thread *td, struct kill_args *uap) { return (kern_kill(td, uap->pid, uap->signum)); } int kern_kill(struct thread *td, pid_t pid, int signum) { ksiginfo_t ksi; struct proc *p; int error; /* * A process in capability mode can send signals only to himself. * The main rationale behind this is that abort(3) is implemented as * kill(getpid(), SIGABRT). */ if (IN_CAPABILITY_MODE(td) && pid != td->td_proc->p_pid) return (ECAPMODE); AUDIT_ARG_SIGNUM(signum); AUDIT_ARG_PID(pid); if ((u_int)signum > _SIG_MAXSIG) return (EINVAL); ksiginfo_init(&ksi); ksi.ksi_signo = signum; ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; if (pid > 0) { /* kill single process */ if ((p = pfind_any(pid)) == NULL) return (ESRCH); AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, signum); if (error == 0 && signum) pksignal(p, signum, &ksi); PROC_UNLOCK(p); return (error); } switch (pid) { case -1: /* broadcast signal */ return (killpg1(td, signum, 0, 1, &ksi)); case 0: /* signal own process group */ return (killpg1(td, signum, 0, 0, &ksi)); default: /* negative explicit process group */ return (killpg1(td, signum, -pid, 0, &ksi)); } /* NOTREACHED */ } int sys_pdkill(struct thread *td, struct pdkill_args *uap) { struct proc *p; int error; AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_FD(uap->fd); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); error = procdesc_find(td, uap->fd, &cap_pdkill_rights, &p); if (error) return (error); AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, uap->signum); if (error == 0 && uap->signum) kern_psignal(p, uap->signum); PROC_UNLOCK(p); return (error); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct okillpg_args { int pgid; int signum; }; #endif /* ARGSUSED */ int okillpg(struct thread *td, struct okillpg_args *uap) { ksiginfo_t ksi; AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_PID(uap->pgid); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); ksiginfo_init(&ksi); ksi.ksi_signo = uap->signum; ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; return (killpg1(td, uap->signum, uap->pgid, 0, &ksi)); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct sigqueue_args { pid_t pid; int signum; /* union sigval */ void *value; }; #endif int sys_sigqueue(struct thread *td, struct sigqueue_args *uap) { union sigval sv; sv.sival_ptr = uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value) { ksiginfo_t ksi; struct proc *p; int error; if ((u_int)signum > _SIG_MAXSIG) return (EINVAL); /* * Specification says sigqueue can only send signal to * single process. */ if (pid <= 0) return (EINVAL); if ((p = pfind_any(pid)) == NULL) return (ESRCH); error = p_cansignal(td, p, signum); if (error == 0 && signum != 0) { ksiginfo_init(&ksi); ksi.ksi_flags = KSI_SIGQ; ksi.ksi_signo = signum; ksi.ksi_code = SI_QUEUE; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; ksi.ksi_value = *value; error = pksignal(p, ksi.ksi_signo, &ksi); } PROC_UNLOCK(p); return (error); } /* * Send a signal to a process group. */ void gsignal(int pgid, int sig, ksiginfo_t *ksi) { struct pgrp *pgrp; if (pgid != 0) { sx_slock(&proctree_lock); pgrp = pgfind(pgid); sx_sunlock(&proctree_lock); if (pgrp != NULL) { pgsignal(pgrp, sig, 0, ksi); PGRP_UNLOCK(pgrp); } } } /* * Send a signal to a process group. If checktty is 1, * limit to members which have a controlling terminal. */ void pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi) { struct proc *p; if (pgrp) { PGRP_LOCK_ASSERT(pgrp, MA_OWNED); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && (checkctty == 0 || p->p_flag & P_CONTROLT)) pksignal(p, sig, ksi); PROC_UNLOCK(p); } } } /* * Recalculate the signal mask and reset the signal disposition after * usermode frame for delivery is formed. Should be called after * mach-specific routine, because sysent->sv_sendsig() needs correct * ps_siginfo and signal mask. */ static void postsig_done(int sig, struct thread *td, struct sigacts *ps) { sigset_t mask; mtx_assert(&ps->ps_mtx, MA_OWNED); td->td_ru.ru_nsignals++; mask = ps->ps_catchmask[_SIG_IDX(sig)]; if (!SIGISMEMBER(ps->ps_signodefer, sig)) SIGADDSET(mask, sig); kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED); if (SIGISMEMBER(ps->ps_sigreset, sig)) sigdflt(ps, sig); } /* * Send a signal caused by a trap to the current thread. If it will be * caught immediately, deliver it with correct code. Otherwise, post it * normally. */ void trapsignal(struct thread *td, ksiginfo_t *ksi) { struct sigacts *ps; struct proc *p; sigset_t sigmask; int code, sig; p = td->td_proc; sig = ksi->ksi_signo; code = ksi->ksi_code; KASSERT(_SIG_VALID(sig), ("invalid signal")); sigfastblock_fetch(td); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); sigmask = td->td_sigmask; if (td->td_sigblock_val != 0) SIGSETOR(sigmask, fastblock_mask); if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) && !SIGISMEMBER(sigmask, sig)) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_PSIG)) ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)], &td->td_sigmask, code); #endif (*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)], ksi, &td->td_sigmask); postsig_done(sig, td, ps); mtx_unlock(&ps->ps_mtx); } else { /* * Avoid a possible infinite loop if the thread * masking the signal or process is ignoring the * signal. */ if (kern_forcesigexit && (SIGISMEMBER(sigmask, sig) || ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) { SIGDELSET(td->td_sigmask, sig); SIGDELSET(ps->ps_sigcatch, sig); SIGDELSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; td->td_pflags &= ~TDP_SIGFASTBLOCK; td->td_sigblock_val = 0; } mtx_unlock(&ps->ps_mtx); p->p_sig = sig; /* XXX to verify code */ tdsendsignal(p, td, sig, ksi); } PROC_UNLOCK(p); } static struct thread * sigtd(struct proc *p, int sig, bool fast_sigblock) { struct thread *td, *signal_td; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS(!fast_sigblock || p == curproc); /* * Check if current thread can handle the signal without * switching context to another thread. */ if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig) && (!fast_sigblock || curthread->td_sigblock_val == 0)) return (curthread); signal_td = NULL; FOREACH_THREAD_IN_PROC(p, td) { if (!SIGISMEMBER(td->td_sigmask, sig) && (!fast_sigblock || td != curthread || td->td_sigblock_val == 0)) { signal_td = td; break; } } if (signal_td == NULL) signal_td = FIRST_THREAD_IN_PROC(p); return (signal_td); } /* * Send the signal to the process. If the signal has an action, the action * is usually performed by the target process rather than the caller; we add * the signal to the set of pending signals for the process. * * Exceptions: * o When a stop signal is sent to a sleeping process that takes the * default action, the process is stopped without awakening it. * o SIGCONT restarts stopped processes (or puts them back to sleep) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. * * NB: This function may be entered from the debugger via the "kill" DDB * command. There is little that can be done to mitigate the possibly messy * side effects of this unwise possibility. */ void kern_psignal(struct proc *p, int sig) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; (void) tdsendsignal(p, NULL, sig, &ksi); } int pksignal(struct proc *p, int sig, ksiginfo_t *ksi) { return (tdsendsignal(p, NULL, sig, ksi)); } /* Utility function for finding a thread to send signal event to. */ int sigev_findtd(struct proc *p ,struct sigevent *sigev, struct thread **ttd) { struct thread *td; if (sigev->sigev_notify == SIGEV_THREAD_ID) { td = tdfind(sigev->sigev_notify_thread_id, p->p_pid); if (td == NULL) return (ESRCH); *ttd = td; } else { *ttd = NULL; PROC_LOCK(p); } return (0); } void tdsignal(struct thread *td, int sig) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; (void) tdsendsignal(td->td_proc, td, sig, &ksi); } void tdksignal(struct thread *td, int sig, ksiginfo_t *ksi) { (void) tdsendsignal(td->td_proc, td, sig, ksi); } int tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) { sig_t action; sigqueue_t *sigqueue; int prop; struct sigacts *ps; int intrval; int ret = 0; int wakeup_swapper; MPASS(td == NULL || p == td->td_proc); PROC_LOCK_ASSERT(p, MA_OWNED); if (!_SIG_VALID(sig)) panic("%s(): invalid signal %d", __func__, sig); KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("%s: ksi on queue", __func__)); /* * IEEE Std 1003.1-2001: return success when killing a zombie. */ if (p->p_state == PRS_ZOMBIE) { if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } ps = p->p_sigacts; KNOTE_LOCKED(p->p_klist, NOTE_SIGNAL | sig); prop = sigprop(sig); if (td == NULL) { td = sigtd(p, sig, false); sigqueue = &p->p_sigqueue; } else sigqueue = &td->td_sigqueue; SDT_PROBE3(proc, , , signal__send, td, p, sig); /* * If the signal is being ignored, * then we forget about it immediately. * (Note: we don't set SIGCONT in ps_sigignore, * and if it is set to SIG_IGN, * action will be SIG_DFL here.) */ mtx_lock(&ps->ps_mtx); if (SIGISMEMBER(ps->ps_sigignore, sig)) { SDT_PROBE3(proc, , , signal__discard, td, p, sig); mtx_unlock(&ps->ps_mtx); if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } if (SIGISMEMBER(td->td_sigmask, sig)) action = SIG_HOLD; else if (SIGISMEMBER(ps->ps_sigcatch, sig)) action = SIG_CATCH; else action = SIG_DFL; if (SIGISMEMBER(ps->ps_sigintr, sig)) intrval = EINTR; else intrval = ERESTART; mtx_unlock(&ps->ps_mtx); if (prop & SIGPROP_CONT) sigqueue_delete_stopmask_proc(p); else if (prop & SIGPROP_STOP) { /* * If sending a tty stop signal to a member of an orphaned * process group, discard the signal here if the action * is default; don't stop the process below if sleeping, * and don't clear any pending SIGCONT. */ - if ((prop & SIGPROP_TTYSTOP) && - (p->p_pgrp->pg_jobc == 0) && - (action == SIG_DFL)) { + if ((prop & SIGPROP_TTYSTOP) != 0 && + p->p_pgrp->pg_jobc == 0 && + action == SIG_DFL) { if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } sigqueue_delete_proc(p, SIGCONT); if (p->p_flag & P_CONTINUED) { p->p_flag &= ~P_CONTINUED; PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); } } ret = sigqueue_add(sigqueue, sig, ksi); if (ret != 0) return (ret); signotify(td); /* * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ if (action == SIG_HOLD && !((prop & SIGPROP_CONT) && (p->p_flag & P_STOPPED_SIG))) return (ret); wakeup_swapper = 0; /* * Some signals have a process-wide effect and a per-thread * component. Most processing occurs when the process next * tries to cross the user boundary, however there are some * times when processing needs to be done immediately, such as * waking up threads so that they can cross the user boundary. * We try to do the per-process part here. */ if (P_SHOULDSTOP(p)) { KASSERT(!(p->p_flag & P_WEXIT), ("signal to stopped but exiting process")); if (sig == SIGKILL) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * SIGKILL sets process running. * It will die elsewhere. * All threads must be restarted. */ p->p_flag &= ~P_STOPPED_SIG; goto runfast; } if (prop & SIGPROP_CONT) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * If SIGCONT is default (or ignored), we continue the * process but don't leave the signal in sigqueue as * it has no further action. If SIGCONT is held, we * continue the process and leave the signal in * sigqueue. If the process catches SIGCONT, let it * handle the signal itself. If it isn't waiting on * an event, it goes back to run state. * Otherwise, process goes back to sleep state. */ p->p_flag &= ~P_STOPPED_SIG; PROC_SLOCK(p); if (p->p_numthreads == p->p_suspcount) { PROC_SUNLOCK(p); p->p_flag |= P_CONTINUED; p->p_xsig = SIGCONT; PROC_LOCK(p->p_pptr); childproc_continued(p); PROC_UNLOCK(p->p_pptr); PROC_SLOCK(p); } if (action == SIG_DFL) { thread_unsuspend(p); PROC_SUNLOCK(p); sigqueue_delete(sigqueue, sig); goto out; } if (action == SIG_CATCH) { /* * The process wants to catch it so it needs * to run at least one thread, but which one? */ PROC_SUNLOCK(p); goto runfast; } /* * The signal is not ignored or caught. */ thread_unsuspend(p); PROC_SUNLOCK(p); goto out; } if (prop & SIGPROP_STOP) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * Already stopped, don't need to stop again * (If we did the shell could get confused). * Just make sure the signal STOP bit set. */ p->p_flag |= P_STOPPED_SIG; sigqueue_delete(sigqueue, sig); goto out; } /* * All other kinds of signals: * If a thread is sleeping interruptibly, simulate a * wakeup so that when it is continued it will be made * runnable and can look at the signal. However, don't make * the PROCESS runnable, leave it stopped. * It may run a bit until it hits a thread_suspend_check(). */ PROC_SLOCK(p); thread_lock(td); if (TD_CAN_ABORT(td)) wakeup_swapper = sleepq_abort(td, intrval); else thread_unlock(td); PROC_SUNLOCK(p); goto out; /* * Mutexes are short lived. Threads waiting on them will * hit thread_suspend_check() soon. */ } else if (p->p_state == PRS_NORMAL) { if (p->p_flag & P_TRACED || action == SIG_CATCH) { tdsigwakeup(td, sig, action, intrval); goto out; } MPASS(action == SIG_DFL); if (prop & SIGPROP_STOP) { if (p->p_flag & (P_PPWAIT|P_WEXIT)) goto out; p->p_flag |= P_STOPPED_SIG; p->p_xsig = sig; PROC_SLOCK(p); wakeup_swapper = sig_suspend_threads(td, p, 1); if (p->p_numthreads == p->p_suspcount) { /* * only thread sending signal to another * process can reach here, if thread is sending * signal to its process, because thread does * not suspend itself here, p_numthreads * should never be equal to p_suspcount. */ thread_stopped(p); PROC_SUNLOCK(p); sigqueue_delete_proc(p, p->p_xsig); } else PROC_SUNLOCK(p); goto out; } } else { /* Not in "NORMAL" state. discard the signal. */ sigqueue_delete(sigqueue, sig); goto out; } /* * The process is not stopped so we need to apply the signal to all the * running threads. */ runfast: tdsigwakeup(td, sig, action, intrval); PROC_SLOCK(p); thread_unsuspend(p); PROC_SUNLOCK(p); out: /* If we jump here, proc slock should not be owned. */ PROC_SLOCK_ASSERT(p, MA_NOTOWNED); if (wakeup_swapper) kick_proc0(); return (ret); } /* * The force of a signal has been directed against a single * thread. We need to see what we can do about knocking it * out of any sleep it may be in etc. */ static void tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval) { struct proc *p = td->td_proc; int prop, wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); prop = sigprop(sig); PROC_SLOCK(p); thread_lock(td); /* * Bring the priority of a thread up if we want it to get * killed in this lifetime. Be careful to avoid bumping the * priority of the idle thread, since we still allow to signal * kernel processes. */ if (action == SIG_DFL && (prop & SIGPROP_KILL) != 0 && td->td_priority > PUSER && !TD_IS_IDLETHREAD(td)) sched_prio(td, PUSER); if (TD_ON_SLEEPQ(td)) { /* * If thread is sleeping uninterruptibly * we can't interrupt the sleep... the signal will * be noticed when the process returns through * trap() or syscall(). */ if ((td->td_flags & TDF_SINTR) == 0) goto out; /* * If SIGCONT is default (or ignored) and process is * asleep, we are finished; the process should not * be awakened. */ if ((prop & SIGPROP_CONT) && action == SIG_DFL) { thread_unlock(td); PROC_SUNLOCK(p); sigqueue_delete(&p->p_sigqueue, sig); /* * It may be on either list in this state. * Remove from both for now. */ sigqueue_delete(&td->td_sigqueue, sig); return; } /* * Don't awaken a sleeping thread for SIGSTOP if the * STOP signal is deferred. */ if ((prop & SIGPROP_STOP) != 0 && (td->td_flags & (TDF_SBDRY | TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY) goto out; /* * Give low priority threads a better chance to run. */ if (td->td_priority > PUSER && !TD_IS_IDLETHREAD(td)) sched_prio(td, PUSER); wakeup_swapper = sleepq_abort(td, intrval); PROC_SUNLOCK(p); if (wakeup_swapper) kick_proc0(); return; } /* * Other states do nothing with the signal immediately, * other than kicking ourselves if we are running. * It will either never be noticed, or noticed very soon. */ #ifdef SMP if (TD_IS_RUNNING(td) && td != curthread) forward_signal(td); #endif out: PROC_SUNLOCK(p); thread_unlock(td); } static int sig_suspend_threads(struct thread *td, struct proc *p, int sending) { struct thread *td2; int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); MPASS(sending || td == curthread); wakeup_swapper = 0; FOREACH_THREAD_IN_PROC(p, td2) { thread_lock(td2); td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) && (td2->td_flags & TDF_SINTR)) { if (td2->td_flags & TDF_SBDRY) { /* * Once a thread is asleep with * TDF_SBDRY and without TDF_SERESTART * or TDF_SEINTR set, it should never * become suspended due to this check. */ KASSERT(!TD_IS_SUSPENDED(td2), ("thread with deferred stops suspended")); if (TD_SBDRY_INTR(td2)) { wakeup_swapper |= sleepq_abort(td2, TD_SBDRY_ERRNO(td2)); continue; } } else if (!TD_IS_SUSPENDED(td2)) thread_suspend_one(td2); } else if (!TD_IS_SUSPENDED(td2)) { if (sending || td != td2) td2->td_flags |= TDF_ASTPENDING; #ifdef SMP if (TD_IS_RUNNING(td2) && td2 != td) forward_signal(td2); #endif } thread_unlock(td2); } return (wakeup_swapper); } /* * Stop the process for an event deemed interesting to the debugger. If si is * non-NULL, this is a signal exchange; the new signal requested by the * debugger will be returned for handling. If si is NULL, this is some other * type of interesting event. The debugger may request a signal be delivered in * that case as well, however it will be deferred until it can be handled. */ int ptracestop(struct thread *td, int sig, ksiginfo_t *si) { struct proc *p = td->td_proc; struct thread *td2; ksiginfo_t ksi; PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(!(p->p_flag & P_WEXIT), ("Stopping exiting process")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Stopping for traced signal"); td->td_xsig = sig; if (si == NULL || (si->ksi_flags & KSI_PTRACE) == 0) { td->td_dbgflags |= TDB_XSIG; CTR4(KTR_PTRACE, "ptracestop: tid %d (pid %d) flags %#x sig %d", td->td_tid, p->p_pid, td->td_dbgflags, sig); PROC_SLOCK(p); while ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_XSIG)) { if (P_KILLED(p)) { /* * Ensure that, if we've been PT_KILLed, the * exit status reflects that. Another thread * may also be in ptracestop(), having just * received the SIGKILL, but this thread was * unsuspended first. */ td->td_dbgflags &= ~TDB_XSIG; td->td_xsig = SIGKILL; p->p_ptevents = 0; break; } if (p->p_flag & P_SINGLE_EXIT && !(td->td_dbgflags & TDB_EXIT)) { /* * Ignore ptrace stops except for thread exit * events when the process exits. */ td->td_dbgflags &= ~TDB_XSIG; PROC_SUNLOCK(p); return (0); } /* * Make wait(2) work. Ensure that right after the * attach, the thread which was decided to become the * leader of attach gets reported to the waiter. * Otherwise, just avoid overwriting another thread's * assignment to p_xthread. If another thread has * already set p_xthread, the current thread will get * a chance to report itself upon the next iteration. */ if ((td->td_dbgflags & TDB_FSTP) != 0 || ((p->p_flag2 & P2_PTRACE_FSTP) == 0 && p->p_xthread == NULL)) { p->p_xsig = sig; p->p_xthread = td; /* * If we are on sleepqueue already, * let sleepqueue code decide if it * needs to go sleep after attach. */ if (td->td_wchan == NULL) td->td_dbgflags &= ~TDB_FSTP; p->p_flag2 &= ~P2_PTRACE_FSTP; p->p_flag |= P_STOPPED_SIG | P_STOPPED_TRACE; sig_suspend_threads(td, p, 0); } if ((td->td_dbgflags & TDB_STOPATFORK) != 0) { td->td_dbgflags &= ~TDB_STOPATFORK; } stopme: thread_suspend_switch(td, p); if (p->p_xthread == td) p->p_xthread = NULL; if (!(p->p_flag & P_TRACED)) break; if (td->td_dbgflags & TDB_SUSPEND) { if (p->p_flag & P_SINGLE_EXIT) break; goto stopme; } } PROC_SUNLOCK(p); } if (si != NULL && sig == td->td_xsig) { /* Parent wants us to take the original signal unchanged. */ si->ksi_flags |= KSI_HEAD; if (sigqueue_add(&td->td_sigqueue, sig, si) != 0) si->ksi_signo = 0; } else if (td->td_xsig != 0) { /* * If parent wants us to take a new signal, then it will leave * it in td->td_xsig; otherwise we just look for signals again. */ ksiginfo_init(&ksi); ksi.ksi_signo = td->td_xsig; ksi.ksi_flags |= KSI_PTRACE; td2 = sigtd(p, td->td_xsig, false); tdsendsignal(p, td2, td->td_xsig, &ksi); if (td != td2) return (0); } return (td->td_xsig); } static void reschedule_signals(struct proc *p, sigset_t block, int flags) { struct sigacts *ps; struct thread *td; int sig; bool fastblk, pslocked; PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; pslocked = (flags & SIGPROCMASK_PS_LOCKED) != 0; mtx_assert(&ps->ps_mtx, pslocked ? MA_OWNED : MA_NOTOWNED); if (SIGISEMPTY(p->p_siglist)) return; SIGSETAND(block, p->p_siglist); fastblk = (flags & SIGPROCMASK_FASTBLK) != 0; while ((sig = sig_ffs(&block)) != 0) { SIGDELSET(block, sig); td = sigtd(p, sig, fastblk); /* * If sigtd() selected us despite sigfastblock is * blocking, do not activate AST or wake us, to avoid * loop in AST handler. */ if (fastblk && td == curthread) continue; signotify(td); if (!pslocked) mtx_lock(&ps->ps_mtx); if (p->p_flag & P_TRACED || (SIGISMEMBER(ps->ps_sigcatch, sig) && !SIGISMEMBER(td->td_sigmask, sig))) { tdsigwakeup(td, sig, SIG_CATCH, (SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR : ERESTART)); } if (!pslocked) mtx_unlock(&ps->ps_mtx); } } void tdsigcleanup(struct thread *td) { struct proc *p; sigset_t unblocked; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sigqueue_flush(&td->td_sigqueue); if (p->p_numthreads == 1) return; /* * Since we cannot handle signals, notify signal post code * about this by filling the sigmask. * * Also, if needed, wake up thread(s) that do not block the * same signals as the exiting thread, since the thread might * have been selected for delivery and woken up. */ SIGFILLSET(unblocked); SIGSETNAND(unblocked, td->td_sigmask); SIGFILLSET(td->td_sigmask); reschedule_signals(p, unblocked, 0); } static int sigdeferstop_curr_flags(int cflags) { MPASS((cflags & (TDF_SEINTR | TDF_SERESTART)) == 0 || (cflags & TDF_SBDRY) != 0); return (cflags & (TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)); } /* * Defer the delivery of SIGSTOP for the current thread, according to * the requested mode. Returns previous flags, which must be restored * by sigallowstop(). * * TDF_SBDRY, TDF_SEINTR, and TDF_SERESTART flags are only set and * cleared by the current thread, which allow the lock-less read-only * accesses below. */ int sigdeferstop_impl(int mode) { struct thread *td; int cflags, nflags; td = curthread; cflags = sigdeferstop_curr_flags(td->td_flags); switch (mode) { case SIGDEFERSTOP_NOP: nflags = cflags; break; case SIGDEFERSTOP_OFF: nflags = 0; break; case SIGDEFERSTOP_SILENT: nflags = (cflags | TDF_SBDRY) & ~(TDF_SEINTR | TDF_SERESTART); break; case SIGDEFERSTOP_EINTR: nflags = (cflags | TDF_SBDRY | TDF_SEINTR) & ~TDF_SERESTART; break; case SIGDEFERSTOP_ERESTART: nflags = (cflags | TDF_SBDRY | TDF_SERESTART) & ~TDF_SEINTR; break; default: panic("sigdeferstop: invalid mode %x", mode); break; } if (cflags == nflags) return (SIGDEFERSTOP_VAL_NCHG); thread_lock(td); td->td_flags = (td->td_flags & ~cflags) | nflags; thread_unlock(td); return (cflags); } /* * Restores the STOP handling mode, typically permitting the delivery * of SIGSTOP for the current thread. This does not immediately * suspend if a stop was posted. Instead, the thread will suspend * either via ast() or a subsequent interruptible sleep. */ void sigallowstop_impl(int prev) { struct thread *td; int cflags; KASSERT(prev != SIGDEFERSTOP_VAL_NCHG, ("failed sigallowstop")); KASSERT((prev & ~(TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)) == 0, ("sigallowstop: incorrect previous mode %x", prev)); td = curthread; cflags = sigdeferstop_curr_flags(td->td_flags); if (cflags != prev) { thread_lock(td); td->td_flags = (td->td_flags & ~cflags) | prev; thread_unlock(td); } } /* * If the current process has received a signal (should be caught or cause * termination, should interrupt current syscall), return the signal number. * Stop signals with default action are processed immediately, then cleared; * they aren't returned. This is checked after each entry to the system for * a syscall or trap (though this can usually be done without calling issignal * by checking the pending signal masks in cursig.) The normal call * sequence is * * while (sig = cursig(curthread)) * postsig(sig); */ static int issignal(struct thread *td) { struct proc *p; struct sigacts *ps; struct sigqueue *queue; sigset_t sigpending; ksiginfo_t ksi; int prop, sig; p = td->td_proc; ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); PROC_LOCK_ASSERT(p, MA_OWNED); for (;;) { sigpending = td->td_sigqueue.sq_signals; SIGSETOR(sigpending, p->p_sigqueue.sq_signals); SIGSETNAND(sigpending, td->td_sigmask); if ((p->p_flag & P_PPWAIT) != 0 || (td->td_flags & (TDF_SBDRY | TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY) SIG_STOPSIGMASK(sigpending); if (SIGISEMPTY(sigpending)) /* no signal to send */ return (0); /* * Do fast sigblock if requested by usermode. Since * we do know that there was a signal pending at this * point, set the FAST_SIGBLOCK_PEND as indicator for * usermode to perform a dummy call to * FAST_SIGBLOCK_UNBLOCK, which causes immediate * delivery of postponed pending signal. */ if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) { if (td->td_sigblock_val != 0) SIGSETNAND(sigpending, fastblock_mask); if (SIGISEMPTY(sigpending)) { td->td_pflags |= TDP_SIGFASTPENDING; return (0); } } if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED && (p->p_flag2 & P2_PTRACE_FSTP) != 0 && SIGISMEMBER(sigpending, SIGSTOP)) { /* * If debugger just attached, always consume * SIGSTOP from ptrace(PT_ATTACH) first, to * execute the debugger attach ritual in * order. */ sig = SIGSTOP; td->td_dbgflags |= TDB_FSTP; } else { sig = sig_ffs(&sigpending); } /* * We should see pending but ignored signals * only if P_TRACED was on when they were posted. */ if (SIGISMEMBER(ps->ps_sigignore, sig) && (p->p_flag & P_TRACED) == 0) { sigqueue_delete(&td->td_sigqueue, sig); sigqueue_delete(&p->p_sigqueue, sig); continue; } if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED) { /* * If traced, always stop. * Remove old signal from queue before the stop. * XXX shrug off debugger, it causes siginfo to * be thrown away. */ queue = &td->td_sigqueue; ksiginfo_init(&ksi); if (sigqueue_get(queue, sig, &ksi) == 0) { queue = &p->p_sigqueue; sigqueue_get(queue, sig, &ksi); } td->td_si = ksi.ksi_info; mtx_unlock(&ps->ps_mtx); sig = ptracestop(td, sig, &ksi); mtx_lock(&ps->ps_mtx); td->td_si.si_signo = 0; /* * Keep looking if the debugger discarded or * replaced the signal. */ if (sig == 0) continue; /* * If the signal became masked, re-queue it. */ if (SIGISMEMBER(td->td_sigmask, sig)) { ksi.ksi_flags |= KSI_HEAD; sigqueue_add(&p->p_sigqueue, sig, &ksi); continue; } /* * If the traced bit got turned off, requeue * the signal and go back up to the top to * rescan signals. This ensures that p_sig* * and p_sigact are consistent. */ if ((p->p_flag & P_TRACED) == 0) { ksi.ksi_flags |= KSI_HEAD; sigqueue_add(queue, sig, &ksi); continue; } } prop = sigprop(sig); /* * Decide whether the signal should be returned. * Return the signal's number, or fall through * to clear it from the pending mask. */ switch ((intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)]) { case (intptr_t)SIG_DFL: /* * Don't take default actions on system processes. */ if (p->p_pid <= 1) { #ifdef DIAGNOSTIC /* * Are you sure you want to ignore SIGSEGV * in init? XXX */ printf("Process (pid %lu) got signal %d\n", (u_long)p->p_pid, sig); #endif break; /* == ignore */ } /* * If there is a pending stop signal to process with * default action, stop here, then clear the signal. * Traced or exiting processes should ignore stops. * Additionally, a member of an orphaned process group * should ignore tty stops. */ if (prop & SIGPROP_STOP) { - if (p->p_flag & - (P_TRACED | P_WEXIT | P_SINGLE_EXIT) || + if ((p->p_flag & (P_TRACED | P_WEXIT | + P_SINGLE_EXIT)) != 0 || (p->p_pgrp->pg_jobc == 0 && - prop & SIGPROP_TTYSTOP)) + (prop & SIGPROP_TTYSTOP) != 0)) break; /* == ignore */ if (TD_SBDRY_INTR(td)) { KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY")); return (-1); } mtx_unlock(&ps->ps_mtx); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Catching SIGSTOP"); sigqueue_delete(&td->td_sigqueue, sig); sigqueue_delete(&p->p_sigqueue, sig); p->p_flag |= P_STOPPED_SIG; p->p_xsig = sig; PROC_SLOCK(p); sig_suspend_threads(td, p, 0); thread_suspend_switch(td, p); PROC_SUNLOCK(p); mtx_lock(&ps->ps_mtx); goto next; } else if (prop & SIGPROP_IGNORE) { /* * Except for SIGCONT, shouldn't get here. * Default action is to ignore; drop it. */ break; /* == ignore */ } else return (sig); /*NOTREACHED*/ case (intptr_t)SIG_IGN: /* * Masking above should prevent us ever trying * to take action on an ignored signal other * than SIGCONT, unless process is traced. */ if ((prop & SIGPROP_CONT) == 0 && (p->p_flag & P_TRACED) == 0) printf("issignal\n"); break; /* == ignore */ default: /* * This signal has an action, let * postsig() process it. */ return (sig); } sigqueue_delete(&td->td_sigqueue, sig); /* take the signal! */ sigqueue_delete(&p->p_sigqueue, sig); next:; } /* NOTREACHED */ } void thread_stopped(struct proc *p) { int n; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); n = p->p_suspcount; if (p == curproc) n++; if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) { PROC_SUNLOCK(p); p->p_flag &= ~P_WAITED; PROC_LOCK(p->p_pptr); childproc_stopped(p, (p->p_flag & P_TRACED) ? CLD_TRAPPED : CLD_STOPPED); PROC_UNLOCK(p->p_pptr); PROC_SLOCK(p); } } /* * Take the action for the specified signal * from the current set of pending signals. */ int postsig(int sig) { struct thread *td; struct proc *p; struct sigacts *ps; sig_t action; ksiginfo_t ksi; sigset_t returnmask; KASSERT(sig != 0, ("postsig")); td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); ksiginfo_init(&ksi); if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 && sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0) return (0); ksi.ksi_signo = sig; if (ksi.ksi_code == SI_TIMER) itimer_accept(p, ksi.ksi_timerid, &ksi); action = ps->ps_sigact[_SIG_IDX(sig)]; #ifdef KTRACE if (KTRPOINT(td, KTR_PSIG)) ktrpsig(sig, action, td->td_pflags & TDP_OLDMASK ? &td->td_oldsigmask : &td->td_sigmask, ksi.ksi_code); #endif if (action == SIG_DFL) { /* * Default action, where the default is to kill * the process. (Other cases were ignored above.) */ mtx_unlock(&ps->ps_mtx); proc_td_siginfo_capture(td, &ksi.ksi_info); sigexit(td, sig); /* NOTREACHED */ } else { /* * If we get here, the signal must be caught. */ KASSERT(action != SIG_IGN, ("postsig action %p", action)); KASSERT(!SIGISMEMBER(td->td_sigmask, sig), ("postsig action: blocked sig %d", sig)); /* * Set the new mask value and also defer further * occurrences of this signal. * * Special case: user has done a sigsuspend. Here the * current mask is not of interest, but rather the * mask from before the sigsuspend is what we want * restored after the signal processing is completed. */ if (td->td_pflags & TDP_OLDMASK) { returnmask = td->td_oldsigmask; td->td_pflags &= ~TDP_OLDMASK; } else returnmask = td->td_sigmask; if (p->p_sig == sig) { p->p_sig = 0; } (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); postsig_done(sig, td, ps); } return (1); } int sig_ast_checksusp(struct thread *td) { struct proc *p; int ret; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) return (0); ret = thread_suspend_check(1); MPASS(ret == 0 || ret == EINTR || ret == ERESTART); return (ret); } int sig_ast_needsigchk(struct thread *td) { struct proc *p; struct sigacts *ps; int ret, sig; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); if ((td->td_flags & TDF_NEEDSIGCHK) == 0) return (0); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); sig = cursig(td); if (sig == -1) { mtx_unlock(&ps->ps_mtx); KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY")); KASSERT(TD_SBDRY_INTR(td), ("lost TDF_SERESTART of TDF_SEINTR")); KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != (TDF_SEINTR | TDF_SERESTART), ("both TDF_SEINTR and TDF_SERESTART")); ret = TD_SBDRY_ERRNO(td); } else if (sig != 0) { ret = SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR : ERESTART; mtx_unlock(&ps->ps_mtx); } else { mtx_unlock(&ps->ps_mtx); ret = 0; } /* * Do not go into sleep if this thread was the ptrace(2) * attach leader. cursig() consumed SIGSTOP from PT_ATTACH, * but we usually act on the signal by interrupting sleep, and * should do that here as well. */ if ((td->td_dbgflags & TDB_FSTP) != 0) { if (ret == 0) ret = EINTR; td->td_dbgflags &= ~TDB_FSTP; } return (ret); } int sig_intr(void) { struct thread *td; struct proc *p; int ret; td = curthread; if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) return (0); p = td->td_proc; PROC_LOCK(p); ret = sig_ast_checksusp(td); if (ret == 0) ret = sig_ast_needsigchk(td); PROC_UNLOCK(p); return (ret); } void proc_wkilled(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag & P_WKILLED) == 0) { p->p_flag |= P_WKILLED; /* * Notify swapper that there is a process to swap in. * The notification is racy, at worst it would take 10 * seconds for the swapper process to notice. */ if ((p->p_flag & (P_INMEM | P_SWAPPINGIN)) == 0) wakeup(&proc0); } } /* * Kill the current process for stated reason. */ void killproc(struct proc *p, const char *why) { PROC_LOCK_ASSERT(p, MA_OWNED); CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)", p, p->p_pid, p->p_comm); log(LOG_ERR, "pid %d (%s), jid %d, uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_ucred->cr_prison->pr_id, p->p_ucred->cr_uid, why); proc_wkilled(p); kern_psignal(p, SIGKILL); } /* * Force the current process to exit with the specified signal, dumping core * if appropriate. We bypass the normal tests for masked and caught signals, * allowing unrecoverable failures to terminate the process without changing * signal state. Mark the accounting record with the signal termination. * If dumping core, save the signal number for the debugger. Calls exit and * does not return. */ void sigexit(struct thread *td, int sig) { struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); p->p_acflag |= AXSIG; /* * We must be single-threading to generate a core dump. This * ensures that the registers in the core file are up-to-date. * Also, the ELF dump handler assumes that the thread list doesn't * change out from under it. * * XXX If another thread attempts to single-thread before us * (e.g. via fork()), we won't get a dump at all. */ if ((sigprop(sig) & SIGPROP_CORE) && thread_single(p, SINGLE_NO_EXIT) == 0) { p->p_sig = sig; /* * Log signals which would cause core dumps * (Log as LOG_INFO to appease those who don't want * these messages.) * XXX : Todo, as well as euid, write out ruid too * Note that coredump() drops proc lock. */ if (coredump(td) == 0) sig |= WCOREFLAG; if (kern_logsigexit) log(LOG_INFO, "pid %d (%s), jid %d, uid %d: exited on " "signal %d%s\n", p->p_pid, p->p_comm, p->p_ucred->cr_prison->pr_id, td->td_ucred->cr_uid, sig &~ WCOREFLAG, sig & WCOREFLAG ? " (core dumped)" : ""); } else PROC_UNLOCK(p); exit1(td, 0, sig); /* NOTREACHED */ } /* * Send queued SIGCHLD to parent when child process's state * is changed. */ static void sigparent(struct proc *p, int reason, int status) { PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED); if (p->p_ksi != NULL) { p->p_ksi->ksi_signo = SIGCHLD; p->p_ksi->ksi_code = reason; p->p_ksi->ksi_status = status; p->p_ksi->ksi_pid = p->p_pid; p->p_ksi->ksi_uid = p->p_ucred->cr_ruid; if (KSI_ONQ(p->p_ksi)) return; } pksignal(p->p_pptr, SIGCHLD, p->p_ksi); } static void childproc_jobstate(struct proc *p, int reason, int sig) { struct sigacts *ps; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED); /* * Wake up parent sleeping in kern_wait(), also send * SIGCHLD to parent, but SIGCHLD does not guarantee * that parent will awake, because parent may masked * the signal. */ p->p_pptr->p_flag |= P_STATCHILD; wakeup(p->p_pptr); ps = p->p_pptr->p_sigacts; mtx_lock(&ps->ps_mtx); if ((ps->ps_flag & PS_NOCLDSTOP) == 0) { mtx_unlock(&ps->ps_mtx); sigparent(p, reason, sig); } else mtx_unlock(&ps->ps_mtx); } void childproc_stopped(struct proc *p, int reason) { childproc_jobstate(p, reason, p->p_xsig); } void childproc_continued(struct proc *p) { childproc_jobstate(p, CLD_CONTINUED, SIGCONT); } void childproc_exited(struct proc *p) { int reason, status; if (WCOREDUMP(p->p_xsig)) { reason = CLD_DUMPED; status = WTERMSIG(p->p_xsig); } else if (WIFSIGNALED(p->p_xsig)) { reason = CLD_KILLED; status = WTERMSIG(p->p_xsig); } else { reason = CLD_EXITED; status = p->p_xexit; } /* * XXX avoid calling wakeup(p->p_pptr), the work is * done in exit1(). */ sigparent(p, reason, status); } #define MAX_NUM_CORE_FILES 100000 #ifndef NUM_CORE_FILES #define NUM_CORE_FILES 5 #endif CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES); static int num_cores = NUM_CORE_FILES; static int sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) { int error; int new_val; new_val = num_cores; error = sysctl_handle_int(oidp, &new_val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new_val > MAX_NUM_CORE_FILES) new_val = MAX_NUM_CORE_FILES; if (new_val < 0) new_val = 0; num_cores = new_val; return (0); } SYSCTL_PROC(_debug, OID_AUTO, ncores, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, sizeof(int), sysctl_debug_num_cores_check, "I", "Maximum number of generated process corefiles while using index format"); #define GZIP_SUFFIX ".gz" #define ZSTD_SUFFIX ".zst" int compress_user_cores = 0; static int sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS) { int error, val; val = compress_user_cores; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val != 0 && !compressor_avail(val)) return (EINVAL); compress_user_cores = val; return (error); } SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), sysctl_compress_user_cores, "I", "Enable compression of user corefiles (" __XSTRING(COMPRESS_GZIP) " = gzip, " __XSTRING(COMPRESS_ZSTD) " = zstd)"); int compress_user_cores_level = 6; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN, &compress_user_cores_level, 0, "Corefile compression level"); /* * Protect the access to corefilename[] by allproc_lock. */ #define corefilename_lock allproc_lock static char corefilename[MAXPATHLEN] = {"%N.core"}; TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); static int sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) { int error; sx_xlock(&corefilename_lock); error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), req); sx_xunlock(&corefilename_lock); return (error); } SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", "Process corefile name format string"); static void vnode_close_locked(struct thread *td, struct vnode *vp) { VOP_UNLOCK(vp); vn_close(vp, FWRITE, td->td_ucred, td); } /* * If the core format has a %I in it, then we need to check * for existing corefiles before defining a name. * To do this we iterate over 0..ncores to find a * non-existing core file name to use. If all core files are * already used we choose the oldest one. */ static int corefile_open_last(struct thread *td, char *name, int indexpos, int indexlen, int ncores, struct vnode **vpp) { struct vnode *oldvp, *nextvp, *vp; struct vattr vattr; struct nameidata nd; int error, i, flags, oflags, cmode; char ch; struct timespec lasttime; nextvp = oldvp = NULL; cmode = S_IRUSR | S_IWUSR; oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); for (i = 0; i < ncores; i++) { flags = O_CREAT | FWRITE | O_NOFOLLOW; ch = name[indexpos + indexlen]; (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen, i); name[indexpos + indexlen] = ch; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL); if (error != 0) break; vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if ((flags & O_CREAT) == O_CREAT) { nextvp = vp; break; } error = VOP_GETATTR(vp, &vattr, td->td_ucred); if (error != 0) { vnode_close_locked(td, vp); break; } if (oldvp == NULL || lasttime.tv_sec > vattr.va_mtime.tv_sec || (lasttime.tv_sec == vattr.va_mtime.tv_sec && lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) { if (oldvp != NULL) vn_close(oldvp, FWRITE, td->td_ucred, td); oldvp = vp; VOP_UNLOCK(oldvp); lasttime = vattr.va_mtime; } else { vnode_close_locked(td, vp); } } if (oldvp != NULL) { if (nextvp == NULL) { if ((td->td_proc->p_flag & P_SUGID) != 0) { error = EFAULT; vn_close(oldvp, FWRITE, td->td_ucred, td); } else { nextvp = oldvp; error = vn_lock(nextvp, LK_EXCLUSIVE); if (error != 0) { vn_close(nextvp, FWRITE, td->td_ucred, td); nextvp = NULL; } } } else { vn_close(oldvp, FWRITE, td->td_ucred, td); } } if (error != 0) { if (nextvp != NULL) vnode_close_locked(td, oldvp); } else { *vpp = nextvp; } return (error); } /* * corefile_open(comm, uid, pid, td, compress, vpp, namep) * Expand the name described in corefilename, using name, uid, and pid * and open/create core file. * corefilename is a printf-like string, with three format specifiers: * %N name of process ("name") * %P process id (pid) * %U user id (uid) * For example, "%N.core" is the default; they can be disabled completely * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". * This is controlled by the sysctl variable kern.corefile (see above). */ static int corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, int compress, int signum, struct vnode **vpp, char **namep) { struct sbuf sb; struct nameidata nd; const char *format; char *hostname, *name; int cmode, error, flags, i, indexpos, indexlen, oflags, ncores; hostname = NULL; format = corefilename; name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); indexlen = 0; indexpos = -1; ncores = num_cores; (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); sx_slock(&corefilename_lock); for (i = 0; format[i] != '\0'; i++) { switch (format[i]) { case '%': /* Format character */ i++; switch (format[i]) { case '%': sbuf_putc(&sb, '%'); break; case 'H': /* hostname */ if (hostname == NULL) { hostname = malloc(MAXHOSTNAMELEN, M_TEMP, M_WAITOK); } getcredhostname(td->td_ucred, hostname, MAXHOSTNAMELEN); sbuf_printf(&sb, "%s", hostname); break; case 'I': /* autoincrementing index */ if (indexpos != -1) { sbuf_printf(&sb, "%%I"); break; } indexpos = sbuf_len(&sb); sbuf_printf(&sb, "%u", ncores - 1); indexlen = sbuf_len(&sb) - indexpos; break; case 'N': /* process name */ sbuf_printf(&sb, "%s", comm); break; case 'P': /* process id */ sbuf_printf(&sb, "%u", pid); break; case 'S': /* signal number */ sbuf_printf(&sb, "%i", signum); break; case 'U': /* user id */ sbuf_printf(&sb, "%u", uid); break; default: log(LOG_ERR, "Unknown format character %c in " "corename `%s'\n", format[i], format); break; } break; default: sbuf_putc(&sb, format[i]); break; } } sx_sunlock(&corefilename_lock); free(hostname, M_TEMP); if (compress == COMPRESS_GZIP) sbuf_printf(&sb, GZIP_SUFFIX); else if (compress == COMPRESS_ZSTD) sbuf_printf(&sb, ZSTD_SUFFIX); if (sbuf_error(&sb) != 0) { log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " "long\n", (long)pid, comm, (u_long)uid); sbuf_delete(&sb); free(name, M_TEMP); return (ENOMEM); } sbuf_finish(&sb); sbuf_delete(&sb); if (indexpos != -1) { error = corefile_open_last(td, name, indexpos, indexlen, ncores, vpp); if (error != 0) { log(LOG_ERR, "pid %d (%s), uid (%u): Path `%s' failed " "on initial open test, error = %d\n", pid, comm, uid, name, error); } } else { cmode = S_IRUSR | S_IWUSR; oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); flags = O_CREAT | FWRITE | O_NOFOLLOW; if ((td->td_proc->p_flag & P_SUGID) != 0) flags |= O_EXCL; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL); if (error == 0) { *vpp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); } } if (error != 0) { #ifdef AUDIT audit_proc_coredump(td, name, error); #endif free(name, M_TEMP); return (error); } *namep = name; return (0); } /* * Dump a process' core. The main routine does some * policy checking, and creates the name of the coredump; * then it passes on a vnode and a size limit to the process-specific * coredump routine if there is one; if there _is not_ one, it returns * ENOSYS; otherwise it returns the error from the process-specific routine. */ static int coredump(struct thread *td) { struct proc *p = td->td_proc; struct ucred *cred = td->td_ucred; struct vnode *vp; struct flock lf; struct vattr vattr; size_t fullpathsize; int error, error1, locked; char *name; /* name of corefile */ void *rl_cookie; off_t limit; char *fullpath, *freepath = NULL; struct sbuf *sb; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || (p->p_flag2 & P2_NOTRACE) != 0) { PROC_UNLOCK(p); return (EFAULT); } /* * Note that the bulk of limit checking is done after * the corefile is created. The exception is if the limit * for corefiles is 0, in which case we don't bother * creating the corefile at all. This layout means that * a corefile is truncated instead of not being created, * if it is larger than the limit. */ limit = (off_t)lim_cur(td, RLIMIT_CORE); if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { PROC_UNLOCK(p); return (EFBIG); } PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, compress_user_cores, p->p_sig, &vp, &name); if (error != 0) return (error); /* * Don't dump to non-regular files or files with links. * Do not dump into system files. Effective user must own the corefile. */ if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 || vattr.va_uid != cred->cr_uid) { VOP_UNLOCK(vp); error = EFAULT; goto out; } VOP_UNLOCK(vp); /* Postpone other writers, including core dumps of other processes. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_WRLCK; locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); VATTR_NULL(&vattr); vattr.va_size = 0; if (set_core_nodump_flag) vattr.va_flags = UF_NODUMP; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp); PROC_LOCK(p); p->p_acflag |= ACORE; PROC_UNLOCK(p); if (p->p_sysent->sv_coredump != NULL) { error = p->p_sysent->sv_coredump(td, vp, limit, 0); } else { error = ENOSYS; } if (locked) { lf.l_type = F_UNLCK; VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); } vn_rangelock_unlock(vp, rl_cookie); /* * Notify the userland helper that a process triggered a core dump. * This allows the helper to run an automated debugging session. */ if (error != 0 || coredump_devctl == 0) goto out; sb = sbuf_new_auto(); if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0) goto out2; sbuf_printf(sb, "comm=\""); devctl_safe_quote_sb(sb, fullpath); free(freepath, M_TEMP); sbuf_printf(sb, "\" core=\""); /* * We can't lookup core file vp directly. When we're replacing a core, and * other random times, we flush the name cache, so it will fail. Instead, * if the path of the core is relative, add the current dir in front if it. */ if (name[0] != '/') { fullpathsize = MAXPATHLEN; freepath = malloc(fullpathsize, M_TEMP, M_WAITOK); if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) { free(freepath, M_TEMP); goto out2; } devctl_safe_quote_sb(sb, fullpath); free(freepath, M_TEMP); sbuf_putc(sb, '/'); } devctl_safe_quote_sb(sb, name); sbuf_printf(sb, "\""); if (sbuf_finish(sb) == 0) devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); out2: sbuf_delete(sb); out: error1 = vn_close(vp, FWRITE, cred, td); if (error == 0) error = error1; #ifdef AUDIT audit_proc_coredump(td, name, error); #endif free(name, M_TEMP); return (error); } /* * Nonexistent system call-- signal process (may want to handle it). Flag * error in case process won't see signal immediately (blocked or ignored). */ #ifndef _SYS_SYSPROTO_H_ struct nosys_args { int dummy; }; #endif /* ARGSUSED */ int nosys(struct thread *td, struct nosys_args *args) { struct proc *p; p = td->td_proc; PROC_LOCK(p); tdsignal(td, SIGSYS); PROC_UNLOCK(p); if (kern_lognosys == 1 || kern_lognosys == 3) { uprintf("pid %d comm %s: nosys %d\n", p->p_pid, p->p_comm, td->td_sa.code); } if (kern_lognosys == 2 || kern_lognosys == 3 || (p->p_pid == 1 && (kern_lognosys & 3) == 0)) { printf("pid %d comm %s: nosys %d\n", p->p_pid, p->p_comm, td->td_sa.code); } return (ENOSYS); } /* * Send a SIGIO or SIGURG signal to a process or process group using stored * credentials rather than those of the current process. */ void pgsigio(struct sigio **sigiop, int sig, int checkctty) { ksiginfo_t ksi; struct sigio *sigio; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; SIGIO_LOCK(); sigio = *sigiop; if (sigio == NULL) { SIGIO_UNLOCK(); return; } if (sigio->sio_pgid > 0) { PROC_LOCK(sigio->sio_proc); if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred)) kern_psignal(sigio->sio_proc, sig); PROC_UNLOCK(sigio->sio_proc); } else if (sigio->sio_pgid < 0) { struct proc *p; PGRP_LOCK(sigio->sio_pgrp); LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && CANSIGIO(sigio->sio_ucred, p->p_ucred) && (checkctty == 0 || (p->p_flag & P_CONTROLT))) kern_psignal(p, sig); PROC_UNLOCK(p); } PGRP_UNLOCK(sigio->sio_pgrp); } SIGIO_UNLOCK(); } static int filt_sigattach(struct knote *kn) { struct proc *p = curproc; kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ knlist_add(p->p_klist, kn, 0); return (0); } static void filt_sigdetach(struct knote *kn) { struct proc *p = kn->kn_ptr.p_proc; knlist_remove(p->p_klist, kn, 0); } /* * signal knotes are shared with proc knotes, so we apply a mask to * the hint in order to differentiate them from process hints. This * could be avoided by using a signal-specific knote list, but probably * isn't worth the trouble. */ static int filt_signal(struct knote *kn, long hint) { if (hint & NOTE_SIGNAL) { hint &= ~NOTE_SIGNAL; if (kn->kn_id == hint) kn->kn_data++; } return (kn->kn_data != 0); } struct sigacts * sigacts_alloc(void) { struct sigacts *ps; ps = malloc(sizeof(struct sigacts), M_SUBPROC, M_WAITOK | M_ZERO); refcount_init(&ps->ps_refcnt, 1); mtx_init(&ps->ps_mtx, "sigacts", NULL, MTX_DEF); return (ps); } void sigacts_free(struct sigacts *ps) { if (refcount_release(&ps->ps_refcnt) == 0) return; mtx_destroy(&ps->ps_mtx); free(ps, M_SUBPROC); } struct sigacts * sigacts_hold(struct sigacts *ps) { refcount_acquire(&ps->ps_refcnt); return (ps); } void sigacts_copy(struct sigacts *dest, struct sigacts *src) { KASSERT(dest->ps_refcnt == 1, ("sigacts_copy to shared dest")); mtx_lock(&src->ps_mtx); bcopy(src, dest, offsetof(struct sigacts, ps_refcnt)); mtx_unlock(&src->ps_mtx); } int sigacts_shared(struct sigacts *ps) { return (ps->ps_refcnt > 1); } void sig_drop_caught(struct proc *p) { int sig; struct sigacts *ps; ps = p->p_sigacts; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&ps->ps_mtx, MA_OWNED); while (SIGNOTEMPTY(ps->ps_sigcatch)) { sig = sig_ffs(&ps->ps_sigcatch); sigdflt(ps, sig); if ((sigprop(sig) & SIGPROP_IGNORE) != 0) sigqueue_delete_proc(p, sig); } } static void sigfastblock_failed(struct thread *td, bool sendsig, bool write) { ksiginfo_t ksi; /* * Prevent further fetches and SIGSEGVs, allowing thread to * issue syscalls despite corruption. */ sigfastblock_clear(td); if (!sendsig) return; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR; ksi.ksi_addr = td->td_sigblock_ptr; trapsignal(td, &ksi); } static bool sigfastblock_fetch_sig(struct thread *td, bool sendsig, uint32_t *valp) { uint32_t res; if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) return (true); if (fueword32((void *)td->td_sigblock_ptr, &res) == -1) { sigfastblock_failed(td, sendsig, false); return (false); } *valp = res; td->td_sigblock_val = res & ~SIGFASTBLOCK_FLAGS; return (true); } static void sigfastblock_resched(struct thread *td, bool resched) { struct proc *p; if (resched) { p = td->td_proc; PROC_LOCK(p); reschedule_signals(p, td->td_sigmask, 0); PROC_UNLOCK(p); } thread_lock(td); td->td_flags |= TDF_ASTPENDING | TDF_NEEDSIGCHK; thread_unlock(td); } int sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap) { struct proc *p; int error, res; uint32_t oldval; error = 0; p = td->td_proc; switch (uap->cmd) { case SIGFASTBLOCK_SETPTR: if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) { error = EBUSY; break; } if (((uintptr_t)(uap->ptr) & (sizeof(uint32_t) - 1)) != 0) { error = EINVAL; break; } td->td_pflags |= TDP_SIGFASTBLOCK; td->td_sigblock_ptr = uap->ptr; break; case SIGFASTBLOCK_UNBLOCK: if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) { error = EINVAL; break; } for (;;) { res = casueword32(td->td_sigblock_ptr, SIGFASTBLOCK_PEND, &oldval, 0); if (res == -1) { error = EFAULT; sigfastblock_failed(td, false, true); break; } if (res == 0) break; MPASS(res == 1); if (oldval != SIGFASTBLOCK_PEND) { error = EBUSY; break; } error = thread_check_susp(td, false); if (error != 0) break; } if (error != 0) break; /* * td_sigblock_val is cleared there, but not on a * syscall exit. The end effect is that a single * interruptible sleep, while user sigblock word is * set, might return EINTR or ERESTART to usermode * without delivering signal. All further sleeps, * until userspace clears the word and does * sigfastblock(UNBLOCK), observe current word and no * longer get interrupted. It is slight * non-conformance, with alternative to have read the * sigblock word on each syscall entry. */ td->td_sigblock_val = 0; /* * Rely on normal ast mechanism to deliver pending * signals to current thread. But notify others about * fake unblock. */ sigfastblock_resched(td, error == 0 && p->p_numthreads != 1); break; case SIGFASTBLOCK_UNSETPTR: if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) { error = EINVAL; break; } if (!sigfastblock_fetch_sig(td, false, &oldval)) { error = EFAULT; break; } if (oldval != 0 && oldval != SIGFASTBLOCK_PEND) { error = EBUSY; break; } sigfastblock_clear(td); break; default: error = EINVAL; break; } return (error); } void sigfastblock_clear(struct thread *td) { bool resched; if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) return; td->td_sigblock_val = 0; resched = (td->td_pflags & TDP_SIGFASTPENDING) != 0 || SIGPENDING(td); td->td_pflags &= ~(TDP_SIGFASTBLOCK | TDP_SIGFASTPENDING); sigfastblock_resched(td, resched); } void sigfastblock_fetch(struct thread *td) { uint32_t val; (void)sigfastblock_fetch_sig(td, true, &val); } static void sigfastblock_setpend1(struct thread *td) { int res; uint32_t oldval; if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) return; res = fueword32((void *)td->td_sigblock_ptr, &oldval); if (res == -1) { sigfastblock_failed(td, true, false); return; } for (;;) { res = casueword32(td->td_sigblock_ptr, oldval, &oldval, oldval | SIGFASTBLOCK_PEND); if (res == -1) { sigfastblock_failed(td, true, true); return; } if (res == 0) { td->td_sigblock_val = oldval & ~SIGFASTBLOCK_FLAGS; td->td_pflags &= ~TDP_SIGFASTPENDING; break; } MPASS(res == 1); if (thread_check_susp(td, false) != 0) break; } } void sigfastblock_setpend(struct thread *td, bool resched) { struct proc *p; sigfastblock_setpend1(td); if (resched) { p = td->td_proc; PROC_LOCK(p); reschedule_signals(p, fastblock_mask, SIGPROCMASK_FASTBLK); PROC_UNLOCK(p); } } diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 8d4d25a4ac0b..af4aebd18979 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1,2451 +1,2451 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008 Ed Schouten * All rights reserved. * * Portions of this software were developed under sponsorship from Snow * B.V., the Netherlands. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_printf.h" #include #include #include #include #include #include #include #include #ifdef COMPAT_43TTY #include #endif /* COMPAT_43TTY */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TTYDEFCHARS #include #undef TTYDEFCHARS #include #include #include #include static MALLOC_DEFINE(M_TTY, "tty", "tty device"); static void tty_rel_free(struct tty *tp); static TAILQ_HEAD(, tty) tty_list = TAILQ_HEAD_INITIALIZER(tty_list); static struct sx tty_list_sx; SX_SYSINIT(tty_list, &tty_list_sx, "tty list"); static unsigned int tty_list_count = 0; /* Character device of /dev/console. */ static struct cdev *dev_console; static const char *dev_console_filename; /* * Flags that are supported and stored by this implementation. */ #define TTYSUP_IFLAG (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK|ISTRIP|\ INLCR|IGNCR|ICRNL|IXON|IXOFF|IXANY|IMAXBEL) #define TTYSUP_OFLAG (OPOST|ONLCR|TAB3|ONOEOT|OCRNL|ONOCR|ONLRET) #define TTYSUP_LFLAG (ECHOKE|ECHOE|ECHOK|ECHO|ECHONL|ECHOPRT|\ ECHOCTL|ISIG|ICANON|ALTWERASE|IEXTEN|TOSTOP|\ FLUSHO|NOKERNINFO|NOFLSH) #define TTYSUP_CFLAG (CIGNORE|CSIZE|CSTOPB|CREAD|PARENB|PARODD|\ HUPCL|CLOCAL|CCTS_OFLOW|CRTS_IFLOW|CDTR_IFLOW|\ CDSR_OFLOW|CCAR_OFLOW|CNO_RTSDTR) #define TTY_CALLOUT(tp,d) (dev2unit(d) & TTYUNIT_CALLOUT) static int tty_drainwait = 5 * 60; SYSCTL_INT(_kern, OID_AUTO, tty_drainwait, CTLFLAG_RWTUN, &tty_drainwait, 0, "Default output drain timeout in seconds"); /* * Set TTY buffer sizes. */ #define TTYBUF_MAX 65536 #ifdef PRINTF_BUFR_SIZE #define TTY_PRBUF_SIZE PRINTF_BUFR_SIZE #else #define TTY_PRBUF_SIZE 256 #endif /* * Allocate buffer space if necessary, and set low watermarks, based on speed. * Note that the ttyxxxq_setsize() functions may drop and then reacquire the tty * lock during memory allocation. They will return ENXIO if the tty disappears * while unlocked. */ static int tty_watermarks(struct tty *tp) { size_t bs = 0; int error; /* Provide an input buffer for 2 seconds of data. */ if (tp->t_termios.c_cflag & CREAD) bs = MIN(tp->t_termios.c_ispeed / 5, TTYBUF_MAX); error = ttyinq_setsize(&tp->t_inq, tp, bs); if (error != 0) return (error); /* Set low watermark at 10% (when 90% is available). */ tp->t_inlow = (ttyinq_getallocatedsize(&tp->t_inq) * 9) / 10; /* Provide an output buffer for 2 seconds of data. */ bs = MIN(tp->t_termios.c_ospeed / 5, TTYBUF_MAX); error = ttyoutq_setsize(&tp->t_outq, tp, bs); if (error != 0) return (error); /* Set low watermark at 10% (when 90% is available). */ tp->t_outlow = (ttyoutq_getallocatedsize(&tp->t_outq) * 9) / 10; return (0); } static int tty_drain(struct tty *tp, int leaving) { sbintime_t timeout_at; size_t bytes; int error; if (ttyhook_hashook(tp, getc_inject)) /* buffer is inaccessible */ return (0); /* * For close(), use the recent historic timeout of "1 second without * making progress". For tcdrain(), use t_drainwait as the timeout, * with zero meaning "no timeout" which gives POSIX behavior. */ if (leaving) timeout_at = getsbinuptime() + SBT_1S; else if (tp->t_drainwait != 0) timeout_at = getsbinuptime() + SBT_1S * tp->t_drainwait; else timeout_at = 0; /* * Poll the output buffer and the hardware for completion, at 10 Hz. * Polling is required for devices which are not able to signal an * interrupt when the transmitter becomes idle (most USB serial devs). * The unusual structure of this loop ensures we check for busy one more * time after tty_timedwait() returns EWOULDBLOCK, so that success has * higher priority than timeout if the IO completed in the last 100mS. */ error = 0; bytes = ttyoutq_bytesused(&tp->t_outq); for (;;) { if (ttyoutq_bytesused(&tp->t_outq) == 0 && !ttydevsw_busy(tp)) return (0); if (error != 0) return (error); ttydevsw_outwakeup(tp); error = tty_timedwait(tp, &tp->t_outwait, hz / 10); if (error != 0 && error != EWOULDBLOCK) return (error); else if (timeout_at == 0 || getsbinuptime() < timeout_at) error = 0; else if (leaving && ttyoutq_bytesused(&tp->t_outq) < bytes) { /* In close, making progress, grant an extra second. */ error = 0; timeout_at += SBT_1S; bytes = ttyoutq_bytesused(&tp->t_outq); } } } /* * Though ttydev_enter() and ttydev_leave() seem to be related, they * don't have to be used together. ttydev_enter() is used by the cdev * operations to prevent an actual operation from being processed when * the TTY has been abandoned. ttydev_leave() is used by ttydev_open() * and ttydev_close() to determine whether per-TTY data should be * deallocated. */ static __inline int ttydev_enter(struct tty *tp) { tty_lock(tp); if (tty_gone(tp) || !tty_opened(tp)) { /* Device is already gone. */ tty_unlock(tp); return (ENXIO); } return (0); } static void ttydev_leave(struct tty *tp) { tty_assert_locked(tp); if (tty_opened(tp) || tp->t_flags & TF_OPENCLOSE) { /* Device is still opened somewhere. */ tty_unlock(tp); return; } tp->t_flags |= TF_OPENCLOSE; /* Remove console TTY. */ if (constty == tp) constty_clear(); /* Drain any output. */ if (!tty_gone(tp)) tty_drain(tp, 1); ttydisc_close(tp); /* Free i/o queues now since they might be large. */ ttyinq_free(&tp->t_inq); tp->t_inlow = 0; ttyoutq_free(&tp->t_outq); tp->t_outlow = 0; knlist_clear(&tp->t_inpoll.si_note, 1); knlist_clear(&tp->t_outpoll.si_note, 1); if (!tty_gone(tp)) ttydevsw_close(tp); tp->t_flags &= ~TF_OPENCLOSE; cv_broadcast(&tp->t_dcdwait); tty_rel_free(tp); } /* * Operations that are exposed through the character device in /dev. */ static int ttydev_open(struct cdev *dev, int oflags, int devtype __unused, struct thread *td) { struct tty *tp; int error; tp = dev->si_drv1; error = 0; tty_lock(tp); if (tty_gone(tp)) { /* Device is already gone. */ tty_unlock(tp); return (ENXIO); } /* * Block when other processes are currently opening or closing * the TTY. */ while (tp->t_flags & TF_OPENCLOSE) { error = tty_wait(tp, &tp->t_dcdwait); if (error != 0) { tty_unlock(tp); return (error); } } tp->t_flags |= TF_OPENCLOSE; /* * Make sure the "tty" and "cua" device cannot be opened at the * same time. The console is a "tty" device. */ if (TTY_CALLOUT(tp, dev)) { if (tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) { error = EBUSY; goto done; } } else { if (tp->t_flags & TF_OPENED_OUT) { error = EBUSY; goto done; } } if (tp->t_flags & TF_EXCLUDE && priv_check(td, PRIV_TTY_EXCLUSIVE)) { error = EBUSY; goto done; } if (!tty_opened(tp)) { /* Set proper termios flags. */ if (TTY_CALLOUT(tp, dev)) tp->t_termios = tp->t_termios_init_out; else tp->t_termios = tp->t_termios_init_in; ttydevsw_param(tp, &tp->t_termios); /* Prevent modem control on callout devices and /dev/console. */ if (TTY_CALLOUT(tp, dev) || dev == dev_console) tp->t_termios.c_cflag |= CLOCAL; if ((tp->t_termios.c_cflag & CNO_RTSDTR) == 0) ttydevsw_modem(tp, SER_DTR|SER_RTS, 0); error = ttydevsw_open(tp); if (error != 0) goto done; ttydisc_open(tp); error = tty_watermarks(tp); if (error != 0) goto done; } /* Wait for Carrier Detect. */ if ((oflags & O_NONBLOCK) == 0 && (tp->t_termios.c_cflag & CLOCAL) == 0) { while ((ttydevsw_modem(tp, 0, 0) & SER_DCD) == 0) { error = tty_wait(tp, &tp->t_dcdwait); if (error != 0) goto done; } } if (dev == dev_console) tp->t_flags |= TF_OPENED_CONS; else if (TTY_CALLOUT(tp, dev)) tp->t_flags |= TF_OPENED_OUT; else tp->t_flags |= TF_OPENED_IN; MPASS((tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) == 0 || (tp->t_flags & TF_OPENED_OUT) == 0); done: tp->t_flags &= ~TF_OPENCLOSE; cv_broadcast(&tp->t_dcdwait); ttydev_leave(tp); return (error); } static int ttydev_close(struct cdev *dev, int fflag, int devtype __unused, struct thread *td __unused) { struct tty *tp = dev->si_drv1; tty_lock(tp); /* * Don't actually close the device if it is being used as the * console. */ MPASS((tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) == 0 || (tp->t_flags & TF_OPENED_OUT) == 0); if (dev == dev_console) tp->t_flags &= ~TF_OPENED_CONS; else tp->t_flags &= ~(TF_OPENED_IN|TF_OPENED_OUT); if (tp->t_flags & TF_OPENED) { tty_unlock(tp); return (0); } /* If revoking, flush output now to avoid draining it later. */ if (fflag & FREVOKE) tty_flush(tp, FWRITE); tp->t_flags &= ~TF_EXCLUDE; /* Properly wake up threads that are stuck - revoke(). */ tp->t_revokecnt++; tty_wakeup(tp, FREAD|FWRITE); cv_broadcast(&tp->t_bgwait); cv_broadcast(&tp->t_dcdwait); ttydev_leave(tp); return (0); } static __inline int tty_is_ctty(struct tty *tp, struct proc *p) { tty_assert_locked(tp); return (p->p_session == tp->t_session && p->p_flag & P_CONTROLT); } int tty_wait_background(struct tty *tp, struct thread *td, int sig) { struct proc *p = td->td_proc; struct pgrp *pg; ksiginfo_t ksi; int error; MPASS(sig == SIGTTIN || sig == SIGTTOU); tty_assert_locked(tp); for (;;) { PROC_LOCK(p); /* * The process should only sleep, when: * - This terminal is the controlling terminal * - Its process group is not the foreground process * group * - The parent process isn't waiting for the child to * exit * - the signal to send to the process isn't masked */ if (!tty_is_ctty(tp, p) || p->p_pgrp == tp->t_pgrp) { /* Allow the action to happen. */ PROC_UNLOCK(p); return (0); } if (SIGISMEMBER(p->p_sigacts->ps_sigignore, sig) || SIGISMEMBER(td->td_sigmask, sig)) { /* Only allow them in write()/ioctl(). */ PROC_UNLOCK(p); return (sig == SIGTTOU ? 0 : EIO); } pg = p->p_pgrp; - if (p->p_flag & P_PPWAIT || pg->pg_jobc == 0) { + if ((p->p_flag & P_PPWAIT) != 0 || pg->pg_jobc == 0) { /* Don't allow the action to happen. */ PROC_UNLOCK(p); return (EIO); } PROC_UNLOCK(p); /* * Send the signal and sleep until we're the new * foreground process group. */ if (sig != 0) { ksiginfo_init(&ksi); ksi.ksi_code = SI_KERNEL; ksi.ksi_signo = sig; sig = 0; } PGRP_LOCK(pg); /* * pg may no longer be our process group. * Re-check after locking process group. */ PROC_LOCK(p); if (p->p_pgrp != pg) { PROC_UNLOCK(p); PGRP_UNLOCK(pg); continue; } PROC_UNLOCK(p); pgsignal(pg, ksi.ksi_signo, 1, &ksi); PGRP_UNLOCK(pg); error = tty_wait(tp, &tp->t_bgwait); if (error) return (error); } } static int ttydev_read(struct cdev *dev, struct uio *uio, int ioflag) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) goto done; error = ttydisc_read(tp, uio, ioflag); tty_unlock(tp); /* * The read() call should not throw an error when the device is * being destroyed. Silently convert it to an EOF. */ done: if (error == ENXIO) error = 0; return (error); } static int ttydev_write(struct cdev *dev, struct uio *uio, int ioflag) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) return (error); if (tp->t_termios.c_lflag & TOSTOP) { error = tty_wait_background(tp, curthread, SIGTTOU); if (error) goto done; } if (ioflag & IO_NDELAY && tp->t_flags & TF_BUSY_OUT) { /* Allow non-blocking writes to bypass serialization. */ error = ttydisc_write(tp, uio, ioflag); } else { /* Serialize write() calls. */ while (tp->t_flags & TF_BUSY_OUT) { error = tty_wait(tp, &tp->t_outserwait); if (error) goto done; } tp->t_flags |= TF_BUSY_OUT; error = ttydisc_write(tp, uio, ioflag); tp->t_flags &= ~TF_BUSY_OUT; cv_signal(&tp->t_outserwait); } done: tty_unlock(tp); return (error); } static int ttydev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) return (error); switch (cmd) { case TIOCCBRK: case TIOCCONS: case TIOCDRAIN: case TIOCEXCL: case TIOCFLUSH: case TIOCNXCL: case TIOCSBRK: case TIOCSCTTY: case TIOCSETA: case TIOCSETAF: case TIOCSETAW: case TIOCSPGRP: case TIOCSTART: case TIOCSTAT: case TIOCSTI: case TIOCSTOP: case TIOCSWINSZ: #if 0 case TIOCSDRAINWAIT: case TIOCSETD: #endif #ifdef COMPAT_43TTY case TIOCLBIC: case TIOCLBIS: case TIOCLSET: case TIOCSETC: case OTIOCSETD: case TIOCSETN: case TIOCSETP: case TIOCSLTC: #endif /* COMPAT_43TTY */ /* * If the ioctl() causes the TTY to be modified, let it * wait in the background. */ error = tty_wait_background(tp, curthread, SIGTTOU); if (error) goto done; } if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) { struct termios *old = &tp->t_termios; struct termios *new = (struct termios *)data; struct termios *lock = TTY_CALLOUT(tp, dev) ? &tp->t_termios_lock_out : &tp->t_termios_lock_in; int cc; /* * Lock state devices. Just overwrite the values of the * commands that are currently in use. */ new->c_iflag = (old->c_iflag & lock->c_iflag) | (new->c_iflag & ~lock->c_iflag); new->c_oflag = (old->c_oflag & lock->c_oflag) | (new->c_oflag & ~lock->c_oflag); new->c_cflag = (old->c_cflag & lock->c_cflag) | (new->c_cflag & ~lock->c_cflag); new->c_lflag = (old->c_lflag & lock->c_lflag) | (new->c_lflag & ~lock->c_lflag); for (cc = 0; cc < NCCS; ++cc) if (lock->c_cc[cc]) new->c_cc[cc] = old->c_cc[cc]; if (lock->c_ispeed) new->c_ispeed = old->c_ispeed; if (lock->c_ospeed) new->c_ospeed = old->c_ospeed; } error = tty_ioctl(tp, cmd, data, fflag, td); done: tty_unlock(tp); return (error); } static int ttydev_poll(struct cdev *dev, int events, struct thread *td) { struct tty *tp = dev->si_drv1; int error, revents = 0; error = ttydev_enter(tp); if (error) return ((events & (POLLIN|POLLRDNORM)) | POLLHUP); if (events & (POLLIN|POLLRDNORM)) { /* See if we can read something. */ if (ttydisc_read_poll(tp) > 0) revents |= events & (POLLIN|POLLRDNORM); } if (tp->t_flags & TF_ZOMBIE) { /* Hangup flag on zombie state. */ revents |= POLLHUP; } else if (events & (POLLOUT|POLLWRNORM)) { /* See if we can write something. */ if (ttydisc_write_poll(tp) > 0) revents |= events & (POLLOUT|POLLWRNORM); } if (revents == 0) { if (events & (POLLIN|POLLRDNORM)) selrecord(td, &tp->t_inpoll); if (events & (POLLOUT|POLLWRNORM)) selrecord(td, &tp->t_outpoll); } tty_unlock(tp); return (revents); } static int ttydev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { struct tty *tp = dev->si_drv1; int error; /* Handle mmap() through the driver. */ error = ttydev_enter(tp); if (error) return (-1); error = ttydevsw_mmap(tp, offset, paddr, nprot, memattr); tty_unlock(tp); return (error); } /* * kqueue support. */ static void tty_kqops_read_detach(struct knote *kn) { struct tty *tp = kn->kn_hook; knlist_remove(&tp->t_inpoll.si_note, kn, 0); } static int tty_kqops_read_event(struct knote *kn, long hint __unused) { struct tty *tp = kn->kn_hook; tty_assert_locked(tp); if (tty_gone(tp) || tp->t_flags & TF_ZOMBIE) { kn->kn_flags |= EV_EOF; return (1); } else { kn->kn_data = ttydisc_read_poll(tp); return (kn->kn_data > 0); } } static void tty_kqops_write_detach(struct knote *kn) { struct tty *tp = kn->kn_hook; knlist_remove(&tp->t_outpoll.si_note, kn, 0); } static int tty_kqops_write_event(struct knote *kn, long hint __unused) { struct tty *tp = kn->kn_hook; tty_assert_locked(tp); if (tty_gone(tp)) { kn->kn_flags |= EV_EOF; return (1); } else { kn->kn_data = ttydisc_write_poll(tp); return (kn->kn_data > 0); } } static struct filterops tty_kqops_read = { .f_isfd = 1, .f_detach = tty_kqops_read_detach, .f_event = tty_kqops_read_event, }; static struct filterops tty_kqops_write = { .f_isfd = 1, .f_detach = tty_kqops_write_detach, .f_event = tty_kqops_write_event, }; static int ttydev_kqfilter(struct cdev *dev, struct knote *kn) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) return (error); switch (kn->kn_filter) { case EVFILT_READ: kn->kn_hook = tp; kn->kn_fop = &tty_kqops_read; knlist_add(&tp->t_inpoll.si_note, kn, 1); break; case EVFILT_WRITE: kn->kn_hook = tp; kn->kn_fop = &tty_kqops_write; knlist_add(&tp->t_outpoll.si_note, kn, 1); break; default: error = EINVAL; break; } tty_unlock(tp); return (error); } static struct cdevsw ttydev_cdevsw = { .d_version = D_VERSION, .d_open = ttydev_open, .d_close = ttydev_close, .d_read = ttydev_read, .d_write = ttydev_write, .d_ioctl = ttydev_ioctl, .d_kqfilter = ttydev_kqfilter, .d_poll = ttydev_poll, .d_mmap = ttydev_mmap, .d_name = "ttydev", .d_flags = D_TTY, }; /* * Init/lock-state devices */ static int ttyil_open(struct cdev *dev, int oflags __unused, int devtype __unused, struct thread *td) { struct tty *tp; int error; tp = dev->si_drv1; error = 0; tty_lock(tp); if (tty_gone(tp)) error = ENODEV; tty_unlock(tp); return (error); } static int ttyil_close(struct cdev *dev __unused, int flag __unused, int mode __unused, struct thread *td __unused) { return (0); } static int ttyil_rdwr(struct cdev *dev __unused, struct uio *uio __unused, int ioflag __unused) { return (ENODEV); } static int ttyil_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct tty *tp = dev->si_drv1; int error; tty_lock(tp); if (tty_gone(tp)) { error = ENODEV; goto done; } error = ttydevsw_cioctl(tp, dev2unit(dev), cmd, data, td); if (error != ENOIOCTL) goto done; error = 0; switch (cmd) { case TIOCGETA: /* Obtain terminal flags through tcgetattr(). */ *(struct termios*)data = *(struct termios*)dev->si_drv2; break; case TIOCSETA: /* Set terminal flags through tcsetattr(). */ error = priv_check(td, PRIV_TTY_SETA); if (error) break; *(struct termios*)dev->si_drv2 = *(struct termios*)data; break; case TIOCGETD: *(int *)data = TTYDISC; break; case TIOCGWINSZ: bzero(data, sizeof(struct winsize)); break; default: error = ENOTTY; } done: tty_unlock(tp); return (error); } static struct cdevsw ttyil_cdevsw = { .d_version = D_VERSION, .d_open = ttyil_open, .d_close = ttyil_close, .d_read = ttyil_rdwr, .d_write = ttyil_rdwr, .d_ioctl = ttyil_ioctl, .d_name = "ttyil", .d_flags = D_TTY, }; static void tty_init_termios(struct tty *tp) { struct termios *t = &tp->t_termios_init_in; t->c_cflag = TTYDEF_CFLAG; t->c_iflag = TTYDEF_IFLAG; t->c_lflag = TTYDEF_LFLAG; t->c_oflag = TTYDEF_OFLAG; t->c_ispeed = TTYDEF_SPEED; t->c_ospeed = TTYDEF_SPEED; memcpy(&t->c_cc, ttydefchars, sizeof ttydefchars); tp->t_termios_init_out = *t; } void tty_init_console(struct tty *tp, speed_t s) { struct termios *ti = &tp->t_termios_init_in; struct termios *to = &tp->t_termios_init_out; if (s != 0) { ti->c_ispeed = ti->c_ospeed = s; to->c_ispeed = to->c_ospeed = s; } ti->c_cflag |= CLOCAL; to->c_cflag |= CLOCAL; } /* * Standard device routine implementations, mostly meant for * pseudo-terminal device drivers. When a driver creates a new terminal * device class, missing routines are patched. */ static int ttydevsw_defopen(struct tty *tp __unused) { return (0); } static void ttydevsw_defclose(struct tty *tp __unused) { } static void ttydevsw_defoutwakeup(struct tty *tp __unused) { panic("Terminal device has output, while not implemented"); } static void ttydevsw_definwakeup(struct tty *tp __unused) { } static int ttydevsw_defioctl(struct tty *tp __unused, u_long cmd __unused, caddr_t data __unused, struct thread *td __unused) { return (ENOIOCTL); } static int ttydevsw_defcioctl(struct tty *tp __unused, int unit __unused, u_long cmd __unused, caddr_t data __unused, struct thread *td __unused) { return (ENOIOCTL); } static int ttydevsw_defparam(struct tty *tp __unused, struct termios *t) { /* * Allow the baud rate to be adjusted for pseudo-devices, but at * least restrict it to 115200 to prevent excessive buffer * usage. Also disallow 0, to prevent foot shooting. */ if (t->c_ispeed < B50) t->c_ispeed = B50; else if (t->c_ispeed > B115200) t->c_ispeed = B115200; if (t->c_ospeed < B50) t->c_ospeed = B50; else if (t->c_ospeed > B115200) t->c_ospeed = B115200; t->c_cflag |= CREAD; return (0); } static int ttydevsw_defmodem(struct tty *tp __unused, int sigon __unused, int sigoff __unused) { /* Simulate a carrier to make the TTY layer happy. */ return (SER_DCD); } static int ttydevsw_defmmap(struct tty *tp __unused, vm_ooffset_t offset __unused, vm_paddr_t *paddr __unused, int nprot __unused, vm_memattr_t *memattr __unused) { return (-1); } static void ttydevsw_defpktnotify(struct tty *tp __unused, char event __unused) { } static void ttydevsw_deffree(void *softc __unused) { panic("Terminal device freed without a free-handler"); } static bool ttydevsw_defbusy(struct tty *tp __unused) { return (FALSE); } /* * TTY allocation and deallocation. TTY devices can be deallocated when * the driver doesn't use it anymore, when the TTY isn't a session's * controlling TTY and when the device node isn't opened through devfs. */ struct tty * tty_alloc(struct ttydevsw *tsw, void *sc) { return (tty_alloc_mutex(tsw, sc, NULL)); } struct tty * tty_alloc_mutex(struct ttydevsw *tsw, void *sc, struct mtx *mutex) { struct tty *tp; /* Make sure the driver defines all routines. */ #define PATCH_FUNC(x) do { \ if (tsw->tsw_ ## x == NULL) \ tsw->tsw_ ## x = ttydevsw_def ## x; \ } while (0) PATCH_FUNC(open); PATCH_FUNC(close); PATCH_FUNC(outwakeup); PATCH_FUNC(inwakeup); PATCH_FUNC(ioctl); PATCH_FUNC(cioctl); PATCH_FUNC(param); PATCH_FUNC(modem); PATCH_FUNC(mmap); PATCH_FUNC(pktnotify); PATCH_FUNC(free); PATCH_FUNC(busy); #undef PATCH_FUNC tp = malloc(sizeof(struct tty) + TTY_PRBUF_SIZE, M_TTY, M_WAITOK | M_ZERO); tp->t_prbufsz = TTY_PRBUF_SIZE; tp->t_devsw = tsw; tp->t_devswsoftc = sc; tp->t_flags = tsw->tsw_flags; tp->t_drainwait = tty_drainwait; tty_init_termios(tp); cv_init(&tp->t_inwait, "ttyin"); cv_init(&tp->t_outwait, "ttyout"); cv_init(&tp->t_outserwait, "ttyosr"); cv_init(&tp->t_bgwait, "ttybg"); cv_init(&tp->t_dcdwait, "ttydcd"); /* Allow drivers to use a custom mutex to lock the TTY. */ if (mutex != NULL) { tp->t_mtx = mutex; } else { tp->t_mtx = &tp->t_mtxobj; mtx_init(&tp->t_mtxobj, "ttymtx", NULL, MTX_DEF); } knlist_init_mtx(&tp->t_inpoll.si_note, tp->t_mtx); knlist_init_mtx(&tp->t_outpoll.si_note, tp->t_mtx); return (tp); } static void tty_dealloc(void *arg) { struct tty *tp = arg; /* * ttyydev_leave() usually frees the i/o queues earlier, but it is * not always called between queue allocation and here. The queues * may be allocated by ioctls on a pty control device without the * corresponding pty slave device ever being open, or after it is * closed. */ ttyinq_free(&tp->t_inq); ttyoutq_free(&tp->t_outq); seldrain(&tp->t_inpoll); seldrain(&tp->t_outpoll); knlist_destroy(&tp->t_inpoll.si_note); knlist_destroy(&tp->t_outpoll.si_note); cv_destroy(&tp->t_inwait); cv_destroy(&tp->t_outwait); cv_destroy(&tp->t_bgwait); cv_destroy(&tp->t_dcdwait); cv_destroy(&tp->t_outserwait); if (tp->t_mtx == &tp->t_mtxobj) mtx_destroy(&tp->t_mtxobj); ttydevsw_free(tp); free(tp, M_TTY); } static void tty_rel_free(struct tty *tp) { struct cdev *dev; tty_assert_locked(tp); #define TF_ACTIVITY (TF_GONE|TF_OPENED|TF_HOOK|TF_OPENCLOSE) if (tp->t_sessioncnt != 0 || (tp->t_flags & TF_ACTIVITY) != TF_GONE) { /* TTY is still in use. */ tty_unlock(tp); return; } /* Stop asynchronous I/O. */ funsetown(&tp->t_sigio); /* TTY can be deallocated. */ dev = tp->t_dev; tp->t_dev = NULL; tty_unlock(tp); if (dev != NULL) { sx_xlock(&tty_list_sx); TAILQ_REMOVE(&tty_list, tp, t_list); tty_list_count--; sx_xunlock(&tty_list_sx); destroy_dev_sched_cb(dev, tty_dealloc, tp); } } void tty_rel_pgrp(struct tty *tp, struct pgrp *pg) { MPASS(tp->t_sessioncnt > 0); tty_assert_locked(tp); if (tp->t_pgrp == pg) tp->t_pgrp = NULL; tty_unlock(tp); } void tty_rel_sess(struct tty *tp, struct session *sess) { MPASS(tp->t_sessioncnt > 0); /* Current session has left. */ if (tp->t_session == sess) { tp->t_session = NULL; MPASS(tp->t_pgrp == NULL); } tp->t_sessioncnt--; tty_rel_free(tp); } void tty_rel_gone(struct tty *tp) { tty_assert_locked(tp); MPASS(!tty_gone(tp)); /* Simulate carrier removal. */ ttydisc_modem(tp, 0); /* Wake up all blocked threads. */ tty_wakeup(tp, FREAD|FWRITE); cv_broadcast(&tp->t_bgwait); cv_broadcast(&tp->t_dcdwait); tp->t_flags |= TF_GONE; tty_rel_free(tp); } static int tty_drop_ctty(struct tty *tp, struct proc *p) { struct session *session; struct vnode *vp; /* * This looks terrible, but it's generally safe as long as the tty * hasn't gone away while we had the lock dropped. All of our sanity * checking that this operation is OK happens after we've picked it back * up, so other state changes are generally not fatal and the potential * for this particular operation to happen out-of-order in a * multithreaded scenario is likely a non-issue. */ tty_unlock(tp); sx_xlock(&proctree_lock); tty_lock(tp); if (tty_gone(tp)) { sx_xunlock(&proctree_lock); return (ENODEV); } /* * If the session doesn't have a controlling TTY, or if we weren't * invoked on the controlling TTY, we'll return ENOIOCTL as we've * historically done. */ session = p->p_session; if (session->s_ttyp == NULL || session->s_ttyp != tp) { sx_xunlock(&proctree_lock); return (ENOTTY); } if (!SESS_LEADER(p)) { sx_xunlock(&proctree_lock); return (EPERM); } PROC_LOCK(p); SESS_LOCK(session); vp = session->s_ttyvp; session->s_ttyp = NULL; session->s_ttyvp = NULL; session->s_ttydp = NULL; SESS_UNLOCK(session); tp->t_sessioncnt--; p->p_flag &= ~P_CONTROLT; PROC_UNLOCK(p); sx_xunlock(&proctree_lock); /* * If we did have a vnode, release our reference. Ordinarily we manage * these at the devfs layer, but we can't necessarily know that we were * invoked on the vnode referenced in the session (i.e. the vnode we * hold a reference to). We explicitly don't check VBAD/VIRF_DOOMED here * to avoid a vnode leak -- in circumstances elsewhere where we'd hit a * VIRF_DOOMED vnode, release has been deferred until the controlling TTY * is either changed or released. */ if (vp != NULL) devfs_ctty_unref(vp); return (0); } /* * Exposing information about current TTY's through sysctl */ static void tty_to_xtty(struct tty *tp, struct xtty *xt) { tty_assert_locked(tp); xt->xt_size = sizeof(struct xtty); xt->xt_insize = ttyinq_getsize(&tp->t_inq); xt->xt_incc = ttyinq_bytescanonicalized(&tp->t_inq); xt->xt_inlc = ttyinq_bytesline(&tp->t_inq); xt->xt_inlow = tp->t_inlow; xt->xt_outsize = ttyoutq_getsize(&tp->t_outq); xt->xt_outcc = ttyoutq_bytesused(&tp->t_outq); xt->xt_outlow = tp->t_outlow; xt->xt_column = tp->t_column; xt->xt_pgid = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; xt->xt_sid = tp->t_session ? tp->t_session->s_sid : 0; xt->xt_flags = tp->t_flags; xt->xt_dev = tp->t_dev ? dev2udev(tp->t_dev) : (uint32_t)NODEV; } static int sysctl_kern_ttys(SYSCTL_HANDLER_ARGS) { unsigned long lsize; struct xtty *xtlist, *xt; struct tty *tp; int error; sx_slock(&tty_list_sx); lsize = tty_list_count * sizeof(struct xtty); if (lsize == 0) { sx_sunlock(&tty_list_sx); return (0); } xtlist = xt = malloc(lsize, M_TTY, M_WAITOK); TAILQ_FOREACH(tp, &tty_list, t_list) { tty_lock(tp); tty_to_xtty(tp, xt); tty_unlock(tp); xt++; } sx_sunlock(&tty_list_sx); error = SYSCTL_OUT(req, xtlist, lsize); free(xtlist, M_TTY); return (error); } SYSCTL_PROC(_kern, OID_AUTO, ttys, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_ttys, "S,xtty", "List of TTYs"); /* * Device node creation. Device has been set up, now we can expose it to * the user. */ int tty_makedevf(struct tty *tp, struct ucred *cred, int flags, const char *fmt, ...) { va_list ap; struct make_dev_args args; struct cdev *dev, *init, *lock, *cua, *cinit, *clock; const char *prefix = "tty"; char name[SPECNAMELEN - 3]; /* for "tty" and "cua". */ uid_t uid; gid_t gid; mode_t mode; int error; /* Remove "tty" prefix from devices like PTY's. */ if (tp->t_flags & TF_NOPREFIX) prefix = ""; va_start(ap, fmt); vsnrprintf(name, sizeof name, 32, fmt, ap); va_end(ap); if (cred == NULL) { /* System device. */ uid = UID_ROOT; gid = GID_WHEEL; mode = S_IRUSR|S_IWUSR; } else { /* User device. */ uid = cred->cr_ruid; gid = GID_TTY; mode = S_IRUSR|S_IWUSR|S_IWGRP; } flags = flags & TTYMK_CLONING ? MAKEDEV_REF : 0; flags |= MAKEDEV_CHECKNAME; /* Master call-in device. */ make_dev_args_init(&args); args.mda_flags = flags; args.mda_devsw = &ttydev_cdevsw; args.mda_cr = cred; args.mda_uid = uid; args.mda_gid = gid; args.mda_mode = mode; args.mda_si_drv1 = tp; error = make_dev_s(&args, &dev, "%s%s", prefix, name); if (error != 0) return (error); tp->t_dev = dev; init = lock = cua = cinit = clock = NULL; /* Slave call-in devices. */ if (tp->t_flags & TF_INITLOCK) { args.mda_devsw = &ttyil_cdevsw; args.mda_unit = TTYUNIT_INIT; args.mda_si_drv1 = tp; args.mda_si_drv2 = &tp->t_termios_init_in; error = make_dev_s(&args, &init, "%s%s.init", prefix, name); if (error != 0) goto fail; dev_depends(dev, init); args.mda_unit = TTYUNIT_LOCK; args.mda_si_drv2 = &tp->t_termios_lock_in; error = make_dev_s(&args, &lock, "%s%s.lock", prefix, name); if (error != 0) goto fail; dev_depends(dev, lock); } /* Call-out devices. */ if (tp->t_flags & TF_CALLOUT) { make_dev_args_init(&args); args.mda_flags = flags; args.mda_devsw = &ttydev_cdevsw; args.mda_cr = cred; args.mda_uid = UID_UUCP; args.mda_gid = GID_DIALER; args.mda_mode = 0660; args.mda_unit = TTYUNIT_CALLOUT; args.mda_si_drv1 = tp; error = make_dev_s(&args, &cua, "cua%s", name); if (error != 0) goto fail; dev_depends(dev, cua); /* Slave call-out devices. */ if (tp->t_flags & TF_INITLOCK) { args.mda_devsw = &ttyil_cdevsw; args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_INIT; args.mda_si_drv2 = &tp->t_termios_init_out; error = make_dev_s(&args, &cinit, "cua%s.init", name); if (error != 0) goto fail; dev_depends(dev, cinit); args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_LOCK; args.mda_si_drv2 = &tp->t_termios_lock_out; error = make_dev_s(&args, &clock, "cua%s.lock", name); if (error != 0) goto fail; dev_depends(dev, clock); } } sx_xlock(&tty_list_sx); TAILQ_INSERT_TAIL(&tty_list, tp, t_list); tty_list_count++; sx_xunlock(&tty_list_sx); return (0); fail: destroy_dev(dev); if (init) destroy_dev(init); if (lock) destroy_dev(lock); if (cinit) destroy_dev(cinit); if (clock) destroy_dev(clock); return (error); } /* * Signalling processes. */ void tty_signal_sessleader(struct tty *tp, int sig) { struct proc *p; struct session *s; tty_assert_locked(tp); MPASS(sig >= 1 && sig < NSIG); /* Make signals start output again. */ tp->t_flags &= ~TF_STOPPED; tp->t_termios.c_lflag &= ~FLUSHO; /* * Load s_leader exactly once to avoid race where s_leader is * set to NULL by a concurrent invocation of killjobc() by the * session leader. Note that we are not holding t_session's * lock for the read. */ if ((s = tp->t_session) != NULL && (p = atomic_load_ptr(&s->s_leader)) != NULL) { PROC_LOCK(p); kern_psignal(p, sig); PROC_UNLOCK(p); } } void tty_signal_pgrp(struct tty *tp, int sig) { ksiginfo_t ksi; tty_assert_locked(tp); MPASS(sig >= 1 && sig < NSIG); /* Make signals start output again. */ tp->t_flags &= ~TF_STOPPED; tp->t_termios.c_lflag &= ~FLUSHO; if (sig == SIGINFO && !(tp->t_termios.c_lflag & NOKERNINFO)) tty_info(tp); if (tp->t_pgrp != NULL) { ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; PGRP_LOCK(tp->t_pgrp); pgsignal(tp->t_pgrp, sig, 1, &ksi); PGRP_UNLOCK(tp->t_pgrp); } } void tty_wakeup(struct tty *tp, int flags) { if (tp->t_flags & TF_ASYNC && tp->t_sigio != NULL) pgsigio(&tp->t_sigio, SIGIO, (tp->t_session != NULL)); if (flags & FWRITE) { cv_broadcast(&tp->t_outwait); selwakeup(&tp->t_outpoll); KNOTE_LOCKED(&tp->t_outpoll.si_note, 0); } if (flags & FREAD) { cv_broadcast(&tp->t_inwait); selwakeup(&tp->t_inpoll); KNOTE_LOCKED(&tp->t_inpoll.si_note, 0); } } int tty_wait(struct tty *tp, struct cv *cv) { int error; int revokecnt = tp->t_revokecnt; tty_lock_assert(tp, MA_OWNED|MA_NOTRECURSED); MPASS(!tty_gone(tp)); error = cv_wait_sig(cv, tp->t_mtx); /* Bail out when the device slipped away. */ if (tty_gone(tp)) return (ENXIO); /* Restart the system call when we may have been revoked. */ if (tp->t_revokecnt != revokecnt) return (ERESTART); return (error); } int tty_timedwait(struct tty *tp, struct cv *cv, int hz) { int error; int revokecnt = tp->t_revokecnt; tty_lock_assert(tp, MA_OWNED|MA_NOTRECURSED); MPASS(!tty_gone(tp)); error = cv_timedwait_sig(cv, tp->t_mtx, hz); /* Bail out when the device slipped away. */ if (tty_gone(tp)) return (ENXIO); /* Restart the system call when we may have been revoked. */ if (tp->t_revokecnt != revokecnt) return (ERESTART); return (error); } void tty_flush(struct tty *tp, int flags) { if (flags & FWRITE) { tp->t_flags &= ~TF_HIWAT_OUT; ttyoutq_flush(&tp->t_outq); tty_wakeup(tp, FWRITE); if (!tty_gone(tp)) { ttydevsw_outwakeup(tp); ttydevsw_pktnotify(tp, TIOCPKT_FLUSHWRITE); } } if (flags & FREAD) { tty_hiwat_in_unblock(tp); ttyinq_flush(&tp->t_inq); tty_wakeup(tp, FREAD); if (!tty_gone(tp)) { ttydevsw_inwakeup(tp); ttydevsw_pktnotify(tp, TIOCPKT_FLUSHREAD); } } } void tty_set_winsize(struct tty *tp, const struct winsize *wsz) { if (memcmp(&tp->t_winsize, wsz, sizeof(*wsz)) == 0) return; tp->t_winsize = *wsz; tty_signal_pgrp(tp, SIGWINCH); } static int tty_generic_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td) { int error; switch (cmd) { /* * Modem commands. * The SER_* and TIOCM_* flags are the same, but one bit * shifted. I don't know why. */ case TIOCSDTR: ttydevsw_modem(tp, SER_DTR, 0); return (0); case TIOCCDTR: ttydevsw_modem(tp, 0, SER_DTR); return (0); case TIOCMSET: { int bits = *(int *)data; ttydevsw_modem(tp, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1, ((~bits) & (TIOCM_DTR | TIOCM_RTS)) >> 1); return (0); } case TIOCMBIS: { int bits = *(int *)data; ttydevsw_modem(tp, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1, 0); return (0); } case TIOCMBIC: { int bits = *(int *)data; ttydevsw_modem(tp, 0, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1); return (0); } case TIOCMGET: *(int *)data = TIOCM_LE + (ttydevsw_modem(tp, 0, 0) << 1); return (0); case FIOASYNC: if (*(int *)data) tp->t_flags |= TF_ASYNC; else tp->t_flags &= ~TF_ASYNC; return (0); case FIONBIO: /* This device supports non-blocking operation. */ return (0); case FIONREAD: *(int *)data = ttyinq_bytescanonicalized(&tp->t_inq); return (0); case FIONWRITE: case TIOCOUTQ: *(int *)data = ttyoutq_bytesused(&tp->t_outq); return (0); case FIOSETOWN: if (tp->t_session != NULL && !tty_is_ctty(tp, td->td_proc)) /* Not allowed to set ownership. */ return (ENOTTY); /* Temporarily unlock the TTY to set ownership. */ tty_unlock(tp); error = fsetown(*(int *)data, &tp->t_sigio); tty_lock(tp); return (error); case FIOGETOWN: if (tp->t_session != NULL && !tty_is_ctty(tp, td->td_proc)) /* Not allowed to set ownership. */ return (ENOTTY); /* Get ownership. */ *(int *)data = fgetown(&tp->t_sigio); return (0); case TIOCGETA: /* Obtain terminal flags through tcgetattr(). */ *(struct termios*)data = tp->t_termios; return (0); case TIOCSETA: case TIOCSETAW: case TIOCSETAF: { struct termios *t = data; /* * Who makes up these funny rules? According to POSIX, * input baud rate is set equal to the output baud rate * when zero. */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* Discard any unsupported bits. */ t->c_iflag &= TTYSUP_IFLAG; t->c_oflag &= TTYSUP_OFLAG; t->c_lflag &= TTYSUP_LFLAG; t->c_cflag &= TTYSUP_CFLAG; /* Set terminal flags through tcsetattr(). */ if (cmd == TIOCSETAW || cmd == TIOCSETAF) { error = tty_drain(tp, 0); if (error) return (error); if (cmd == TIOCSETAF) tty_flush(tp, FREAD); } /* * Only call param() when the flags really change. */ if ((t->c_cflag & CIGNORE) == 0 && (tp->t_termios.c_cflag != t->c_cflag || ((tp->t_termios.c_iflag ^ t->c_iflag) & (IXON|IXOFF|IXANY)) || tp->t_termios.c_ispeed != t->c_ispeed || tp->t_termios.c_ospeed != t->c_ospeed)) { error = ttydevsw_param(tp, t); if (error) return (error); /* XXX: CLOCAL? */ tp->t_termios.c_cflag = t->c_cflag & ~CIGNORE; tp->t_termios.c_ispeed = t->c_ispeed; tp->t_termios.c_ospeed = t->c_ospeed; /* Baud rate has changed - update watermarks. */ error = tty_watermarks(tp); if (error) return (error); } /* Copy new non-device driver parameters. */ tp->t_termios.c_iflag = t->c_iflag; tp->t_termios.c_oflag = t->c_oflag; tp->t_termios.c_lflag = t->c_lflag; memcpy(&tp->t_termios.c_cc, t->c_cc, sizeof t->c_cc); ttydisc_optimize(tp); if ((t->c_lflag & ICANON) == 0) { /* * When in non-canonical mode, wake up all * readers. Canonicalize any partial input. VMIN * and VTIME could also be adjusted. */ ttyinq_canonicalize(&tp->t_inq); tty_wakeup(tp, FREAD); } /* * For packet mode: notify the PTY consumer that VSTOP * and VSTART may have been changed. */ if (tp->t_termios.c_iflag & IXON && tp->t_termios.c_cc[VSTOP] == CTRL('S') && tp->t_termios.c_cc[VSTART] == CTRL('Q')) ttydevsw_pktnotify(tp, TIOCPKT_DOSTOP); else ttydevsw_pktnotify(tp, TIOCPKT_NOSTOP); return (0); } case TIOCGETD: /* For compatibility - we only support TTYDISC. */ *(int *)data = TTYDISC; return (0); case TIOCGPGRP: if (!tty_is_ctty(tp, td->td_proc)) return (ENOTTY); if (tp->t_pgrp != NULL) *(int *)data = tp->t_pgrp->pg_id; else *(int *)data = NO_PID; return (0); case TIOCGSID: if (!tty_is_ctty(tp, td->td_proc)) return (ENOTTY); MPASS(tp->t_session); *(int *)data = tp->t_session->s_sid; return (0); case TIOCNOTTY: return (tty_drop_ctty(tp, td->td_proc)); case TIOCSCTTY: { struct proc *p = td->td_proc; /* XXX: This looks awful. */ tty_unlock(tp); sx_xlock(&proctree_lock); tty_lock(tp); if (!SESS_LEADER(p)) { /* Only the session leader may do this. */ sx_xunlock(&proctree_lock); return (EPERM); } if (tp->t_session != NULL && tp->t_session == p->p_session) { /* This is already our controlling TTY. */ sx_xunlock(&proctree_lock); return (0); } if (p->p_session->s_ttyp != NULL || (tp->t_session != NULL && tp->t_session->s_ttyvp != NULL && tp->t_session->s_ttyvp->v_type != VBAD)) { /* * There is already a relation between a TTY and * a session, or the caller is not the session * leader. * * Allow the TTY to be stolen when the vnode is * invalid, but the reference to the TTY is * still active. This allows immediate reuse of * TTYs of which the session leader has been * killed or the TTY revoked. */ sx_xunlock(&proctree_lock); return (EPERM); } /* Connect the session to the TTY. */ tp->t_session = p->p_session; tp->t_session->s_ttyp = tp; tp->t_sessioncnt++; /* Assign foreground process group. */ tp->t_pgrp = p->p_pgrp; PROC_LOCK(p); p->p_flag |= P_CONTROLT; PROC_UNLOCK(p); sx_xunlock(&proctree_lock); return (0); } case TIOCSPGRP: { struct pgrp *pg; /* * XXX: Temporarily unlock the TTY to locate the process * group. This code would be lot nicer if we would ever * decompose proctree_lock. */ tty_unlock(tp); sx_slock(&proctree_lock); pg = pgfind(*(int *)data); if (pg != NULL) PGRP_UNLOCK(pg); if (pg == NULL || pg->pg_session != td->td_proc->p_session) { sx_sunlock(&proctree_lock); tty_lock(tp); return (EPERM); } tty_lock(tp); /* * Determine if this TTY is the controlling TTY after * relocking the TTY. */ if (!tty_is_ctty(tp, td->td_proc)) { sx_sunlock(&proctree_lock); return (ENOTTY); } tp->t_pgrp = pg; sx_sunlock(&proctree_lock); /* Wake up the background process groups. */ cv_broadcast(&tp->t_bgwait); return (0); } case TIOCFLUSH: { int flags = *(int *)data; if (flags == 0) flags = (FREAD|FWRITE); else flags &= (FREAD|FWRITE); tty_flush(tp, flags); return (0); } case TIOCDRAIN: /* Drain TTY output. */ return tty_drain(tp, 0); case TIOCGDRAINWAIT: *(int *)data = tp->t_drainwait; return (0); case TIOCSDRAINWAIT: error = priv_check(td, PRIV_TTY_DRAINWAIT); if (error == 0) tp->t_drainwait = *(int *)data; return (error); case TIOCCONS: /* Set terminal as console TTY. */ if (*(int *)data) { error = priv_check(td, PRIV_TTY_CONSOLE); if (error) return (error); /* * XXX: constty should really need to be locked! * XXX: allow disconnected constty's to be stolen! */ if (constty == tp) return (0); if (constty != NULL) return (EBUSY); tty_unlock(tp); constty_set(tp); tty_lock(tp); } else if (constty == tp) { constty_clear(); } return (0); case TIOCGWINSZ: /* Obtain window size. */ *(struct winsize*)data = tp->t_winsize; return (0); case TIOCSWINSZ: /* Set window size. */ tty_set_winsize(tp, data); return (0); case TIOCEXCL: tp->t_flags |= TF_EXCLUDE; return (0); case TIOCNXCL: tp->t_flags &= ~TF_EXCLUDE; return (0); case TIOCSTOP: tp->t_flags |= TF_STOPPED; ttydevsw_pktnotify(tp, TIOCPKT_STOP); return (0); case TIOCSTART: tp->t_flags &= ~TF_STOPPED; tp->t_termios.c_lflag &= ~FLUSHO; ttydevsw_outwakeup(tp); ttydevsw_pktnotify(tp, TIOCPKT_START); return (0); case TIOCSTAT: tty_info(tp); return (0); case TIOCSTI: if ((fflag & FREAD) == 0 && priv_check(td, PRIV_TTY_STI)) return (EPERM); if (!tty_is_ctty(tp, td->td_proc) && priv_check(td, PRIV_TTY_STI)) return (EACCES); ttydisc_rint(tp, *(char *)data, 0); ttydisc_rint_done(tp); return (0); } #ifdef COMPAT_43TTY return tty_ioctl_compat(tp, cmd, data, fflag, td); #else /* !COMPAT_43TTY */ return (ENOIOCTL); #endif /* COMPAT_43TTY */ } int tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td) { int error; tty_assert_locked(tp); if (tty_gone(tp)) return (ENXIO); error = ttydevsw_ioctl(tp, cmd, data, td); if (error == ENOIOCTL) error = tty_generic_ioctl(tp, cmd, data, fflag, td); return (error); } dev_t tty_udev(struct tty *tp) { if (tp->t_dev) return (dev2udev(tp->t_dev)); else return (NODEV); } int tty_checkoutq(struct tty *tp) { /* 256 bytes should be enough to print a log message. */ return (ttyoutq_bytesleft(&tp->t_outq) >= 256); } void tty_hiwat_in_block(struct tty *tp) { if ((tp->t_flags & TF_HIWAT_IN) == 0 && tp->t_termios.c_iflag & IXOFF && tp->t_termios.c_cc[VSTOP] != _POSIX_VDISABLE) { /* * Input flow control. Only enter the high watermark when we * can successfully store the VSTOP character. */ if (ttyoutq_write_nofrag(&tp->t_outq, &tp->t_termios.c_cc[VSTOP], 1) == 0) tp->t_flags |= TF_HIWAT_IN; } else { /* No input flow control. */ tp->t_flags |= TF_HIWAT_IN; } } void tty_hiwat_in_unblock(struct tty *tp) { if (tp->t_flags & TF_HIWAT_IN && tp->t_termios.c_iflag & IXOFF && tp->t_termios.c_cc[VSTART] != _POSIX_VDISABLE) { /* * Input flow control. Only leave the high watermark when we * can successfully store the VSTART character. */ if (ttyoutq_write_nofrag(&tp->t_outq, &tp->t_termios.c_cc[VSTART], 1) == 0) tp->t_flags &= ~TF_HIWAT_IN; } else { /* No input flow control. */ tp->t_flags &= ~TF_HIWAT_IN; } if (!tty_gone(tp)) ttydevsw_inwakeup(tp); } /* * TTY hooks interface. */ static int ttyhook_defrint(struct tty *tp, char c, int flags) { if (ttyhook_rint_bypass(tp, &c, 1) != 1) return (-1); return (0); } int ttyhook_register(struct tty **rtp, struct proc *p, int fd, struct ttyhook *th, void *softc) { struct tty *tp; struct file *fp; struct cdev *dev; struct cdevsw *cdp; struct filedesc *fdp; cap_rights_t rights; int error, ref; /* Validate the file descriptor. */ fdp = p->p_fd; error = fget_unlocked(fdp, fd, cap_rights_init(&rights, CAP_TTYHOOK), &fp); if (error != 0) return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we * only shall prevent calling devvn_refthread on the file that * never has been opened over a character device. */ if (fp->f_type != DTYPE_VNODE || fp->f_vnode->v_type != VCHR) { error = EINVAL; goto done1; } /* Make sure it is a TTY. */ cdp = devvn_refthread(fp->f_vnode, &dev, &ref); if (cdp == NULL) { error = ENXIO; goto done1; } if (dev != fp->f_data) { error = ENXIO; goto done2; } if (cdp != &ttydev_cdevsw) { error = ENOTTY; goto done2; } tp = dev->si_drv1; /* Try to attach the hook to the TTY. */ error = EBUSY; tty_lock(tp); MPASS((tp->t_hook == NULL) == ((tp->t_flags & TF_HOOK) == 0)); if (tp->t_flags & TF_HOOK) goto done3; tp->t_flags |= TF_HOOK; tp->t_hook = th; tp->t_hooksoftc = softc; *rtp = tp; error = 0; /* Maybe we can switch into bypass mode now. */ ttydisc_optimize(tp); /* Silently convert rint() calls to rint_bypass() when possible. */ if (!ttyhook_hashook(tp, rint) && ttyhook_hashook(tp, rint_bypass)) th->th_rint = ttyhook_defrint; done3: tty_unlock(tp); done2: dev_relthread(dev, ref); done1: fdrop(fp, curthread); return (error); } void ttyhook_unregister(struct tty *tp) { tty_assert_locked(tp); MPASS(tp->t_flags & TF_HOOK); /* Disconnect the hook. */ tp->t_flags &= ~TF_HOOK; tp->t_hook = NULL; /* Maybe we need to leave bypass mode. */ ttydisc_optimize(tp); /* Maybe deallocate the TTY as well. */ tty_rel_free(tp); } /* * /dev/console handling. */ static int ttyconsdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct tty *tp; /* System has no console device. */ if (dev_console_filename == NULL) return (ENXIO); /* Look up corresponding TTY by device name. */ sx_slock(&tty_list_sx); TAILQ_FOREACH(tp, &tty_list, t_list) { if (strcmp(dev_console_filename, tty_devname(tp)) == 0) { dev_console->si_drv1 = tp; break; } } sx_sunlock(&tty_list_sx); /* System console has no TTY associated. */ if (dev_console->si_drv1 == NULL) return (ENXIO); return (ttydev_open(dev, oflags, devtype, td)); } static int ttyconsdev_write(struct cdev *dev, struct uio *uio, int ioflag) { log_console(uio); return (ttydev_write(dev, uio, ioflag)); } /* * /dev/console is a little different than normal TTY's. When opened, * it determines which TTY to use. When data gets written to it, it * will be logged in the kernel message buffer. */ static struct cdevsw ttyconsdev_cdevsw = { .d_version = D_VERSION, .d_open = ttyconsdev_open, .d_close = ttydev_close, .d_read = ttydev_read, .d_write = ttyconsdev_write, .d_ioctl = ttydev_ioctl, .d_kqfilter = ttydev_kqfilter, .d_poll = ttydev_poll, .d_mmap = ttydev_mmap, .d_name = "ttyconsdev", .d_flags = D_TTY, }; static void ttyconsdev_init(void *unused __unused) { dev_console = make_dev_credf(MAKEDEV_ETERNAL, &ttyconsdev_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "console"); } SYSINIT(tty, SI_SUB_DRIVERS, SI_ORDER_FIRST, ttyconsdev_init, NULL); void ttyconsdev_select(const char *name) { dev_console_filename = name; } /* * Debugging routines. */ #include "opt_ddb.h" #ifdef DDB #include #include static const struct { int flag; char val; } ttystates[] = { #if 0 { TF_NOPREFIX, 'N' }, #endif { TF_INITLOCK, 'I' }, { TF_CALLOUT, 'C' }, /* Keep these together -> 'Oi' and 'Oo'. */ { TF_OPENED, 'O' }, { TF_OPENED_IN, 'i' }, { TF_OPENED_OUT, 'o' }, { TF_OPENED_CONS, 'c' }, { TF_GONE, 'G' }, { TF_OPENCLOSE, 'B' }, { TF_ASYNC, 'Y' }, { TF_LITERAL, 'L' }, /* Keep these together -> 'Hi' and 'Ho'. */ { TF_HIWAT, 'H' }, { TF_HIWAT_IN, 'i' }, { TF_HIWAT_OUT, 'o' }, { TF_STOPPED, 'S' }, { TF_EXCLUDE, 'X' }, { TF_BYPASS, 'l' }, { TF_ZOMBIE, 'Z' }, { TF_HOOK, 's' }, /* Keep these together -> 'bi' and 'bo'. */ { TF_BUSY, 'b' }, { TF_BUSY_IN, 'i' }, { TF_BUSY_OUT, 'o' }, { 0, '\0'}, }; #define TTY_FLAG_BITS \ "\20\1NOPREFIX\2INITLOCK\3CALLOUT\4OPENED_IN" \ "\5OPENED_OUT\6OPENED_CONS\7GONE\10OPENCLOSE" \ "\11ASYNC\12LITERAL\13HIWAT_IN\14HIWAT_OUT" \ "\15STOPPED\16EXCLUDE\17BYPASS\20ZOMBIE" \ "\21HOOK\22BUSY_IN\23BUSY_OUT" #define DB_PRINTSYM(name, addr) \ db_printf("%s " #name ": ", sep); \ db_printsym((db_addr_t) addr, DB_STGY_ANY); \ db_printf("\n"); static void _db_show_devsw(const char *sep, const struct ttydevsw *tsw) { db_printf("%sdevsw: ", sep); db_printsym((db_addr_t)tsw, DB_STGY_ANY); db_printf(" (%p)\n", tsw); DB_PRINTSYM(open, tsw->tsw_open); DB_PRINTSYM(close, tsw->tsw_close); DB_PRINTSYM(outwakeup, tsw->tsw_outwakeup); DB_PRINTSYM(inwakeup, tsw->tsw_inwakeup); DB_PRINTSYM(ioctl, tsw->tsw_ioctl); DB_PRINTSYM(param, tsw->tsw_param); DB_PRINTSYM(modem, tsw->tsw_modem); DB_PRINTSYM(mmap, tsw->tsw_mmap); DB_PRINTSYM(pktnotify, tsw->tsw_pktnotify); DB_PRINTSYM(free, tsw->tsw_free); } static void _db_show_hooks(const char *sep, const struct ttyhook *th) { db_printf("%shook: ", sep); db_printsym((db_addr_t)th, DB_STGY_ANY); db_printf(" (%p)\n", th); if (th == NULL) return; DB_PRINTSYM(rint, th->th_rint); DB_PRINTSYM(rint_bypass, th->th_rint_bypass); DB_PRINTSYM(rint_done, th->th_rint_done); DB_PRINTSYM(rint_poll, th->th_rint_poll); DB_PRINTSYM(getc_inject, th->th_getc_inject); DB_PRINTSYM(getc_capture, th->th_getc_capture); DB_PRINTSYM(getc_poll, th->th_getc_poll); DB_PRINTSYM(close, th->th_close); } static void _db_show_termios(const char *name, const struct termios *t) { db_printf("%s: iflag 0x%x oflag 0x%x cflag 0x%x " "lflag 0x%x ispeed %u ospeed %u\n", name, t->c_iflag, t->c_oflag, t->c_cflag, t->c_lflag, t->c_ispeed, t->c_ospeed); } /* DDB command to show TTY statistics. */ DB_SHOW_COMMAND(tty, db_show_tty) { struct tty *tp; if (!have_addr) { db_printf("usage: show tty \n"); return; } tp = (struct tty *)addr; db_printf("%p: %s\n", tp, tty_devname(tp)); db_printf("\tmtx: %p\n", tp->t_mtx); db_printf("\tflags: 0x%b\n", tp->t_flags, TTY_FLAG_BITS); db_printf("\trevokecnt: %u\n", tp->t_revokecnt); /* Buffering mechanisms. */ db_printf("\tinq: %p begin %u linestart %u reprint %u end %u " "nblocks %u quota %u\n", &tp->t_inq, tp->t_inq.ti_begin, tp->t_inq.ti_linestart, tp->t_inq.ti_reprint, tp->t_inq.ti_end, tp->t_inq.ti_nblocks, tp->t_inq.ti_quota); db_printf("\toutq: %p begin %u end %u nblocks %u quota %u\n", &tp->t_outq, tp->t_outq.to_begin, tp->t_outq.to_end, tp->t_outq.to_nblocks, tp->t_outq.to_quota); db_printf("\tinlow: %zu\n", tp->t_inlow); db_printf("\toutlow: %zu\n", tp->t_outlow); _db_show_termios("\ttermios", &tp->t_termios); db_printf("\twinsize: row %u col %u xpixel %u ypixel %u\n", tp->t_winsize.ws_row, tp->t_winsize.ws_col, tp->t_winsize.ws_xpixel, tp->t_winsize.ws_ypixel); db_printf("\tcolumn: %u\n", tp->t_column); db_printf("\twritepos: %u\n", tp->t_writepos); db_printf("\tcompatflags: 0x%x\n", tp->t_compatflags); /* Init/lock-state devices. */ _db_show_termios("\ttermios_init_in", &tp->t_termios_init_in); _db_show_termios("\ttermios_init_out", &tp->t_termios_init_out); _db_show_termios("\ttermios_lock_in", &tp->t_termios_lock_in); _db_show_termios("\ttermios_lock_out", &tp->t_termios_lock_out); /* Hooks */ _db_show_devsw("\t", tp->t_devsw); _db_show_hooks("\t", tp->t_hook); /* Process info. */ db_printf("\tpgrp: %p gid %d jobc %d\n", tp->t_pgrp, tp->t_pgrp ? tp->t_pgrp->pg_id : 0, tp->t_pgrp ? tp->t_pgrp->pg_jobc : 0); db_printf("\tsession: %p", tp->t_session); if (tp->t_session != NULL) db_printf(" count %u leader %p tty %p sid %d login %s", tp->t_session->s_count, tp->t_session->s_leader, tp->t_session->s_ttyp, tp->t_session->s_sid, tp->t_session->s_login); db_printf("\n"); db_printf("\tsessioncnt: %u\n", tp->t_sessioncnt); db_printf("\tdevswsoftc: %p\n", tp->t_devswsoftc); db_printf("\thooksoftc: %p\n", tp->t_hooksoftc); db_printf("\tdev: %p\n", tp->t_dev); } /* DDB command to list TTYs. */ DB_SHOW_ALL_COMMAND(ttys, db_show_all_ttys) { struct tty *tp; size_t isiz, osiz; int i, j; /* Make the output look like `pstat -t'. */ db_printf("PTR "); #if defined(__LP64__) db_printf(" "); #endif db_printf(" LINE INQ CAN LIN LOW OUTQ USE LOW " "COL SESS PGID STATE\n"); TAILQ_FOREACH(tp, &tty_list, t_list) { isiz = tp->t_inq.ti_nblocks * TTYINQ_DATASIZE; osiz = tp->t_outq.to_nblocks * TTYOUTQ_DATASIZE; db_printf("%p %10s %5zu %4u %4u %4zu %5zu %4u %4zu %5u %5d " "%5d ", tp, tty_devname(tp), isiz, tp->t_inq.ti_linestart - tp->t_inq.ti_begin, tp->t_inq.ti_end - tp->t_inq.ti_linestart, isiz - tp->t_inlow, osiz, tp->t_outq.to_end - tp->t_outq.to_begin, osiz - tp->t_outlow, MIN(tp->t_column, 99999), tp->t_session ? tp->t_session->s_sid : 0, tp->t_pgrp ? tp->t_pgrp->pg_id : 0); /* Flag bits. */ for (i = j = 0; ttystates[i].flag; i++) if (tp->t_flags & ttystates[i].flag) { db_printf("%c", ttystates[i].val); j++; } if (j == 0) db_printf("-"); db_printf("\n"); } } #endif /* DDB */