diff --git a/share/man/man5/core.5 b/share/man/man5/core.5 --- a/share/man/man5/core.5 +++ b/share/man/man5/core.5 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 12, 2023 +.Dd July 17, 2025 .Dt CORE 5 .Os .Sh NAME @@ -116,11 +116,13 @@ .Va kern.sugid_coredump to 1. .Pp -Corefiles can be compressed by the kernel if the following item -is included in the kernel configuration file: +Corefiles can be compressed by the kernel if one of the following items +are included in the kernel configuration file: .Bl -tag -width "1234567890" -compact -offset "12345" .It options GZIO +.It options +ZSTDIO .El .Pp The following sysctl control core file compression: diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3768,6 +3768,7 @@ kern/bus_if.m standard kern/clock_if.m standard +kern/coredump_vnode.c standard kern/cpufreq_if.m standard kern/device_if.m standard kern/imgact_binmisc.c optional imgact_binmisc @@ -3856,6 +3857,7 @@ kern/kern_timeout.c standard kern/kern_tslog.c optional tslog kern/kern_ubsan.c optional kubsan +kern/kern_ucoredump.c standard kern/kern_umtx.c standard kern/kern_uuid.c standard kern/kern_vnodedumper.c standard diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c new file mode 100644 --- /dev/null +++ b/sys/kern/coredump_vnode.c @@ -0,0 +1,553 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause + * + * Copyright (c) 1982, 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * - kern_sig.c + */ +/* + * Copyright (c) 1993, David Greenman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * -kern_exec.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define GZIP_SUFFIX ".gz" +#define ZSTD_SUFFIX ".zst" + +#define MAX_NUM_CORE_FILES 100000 +#ifndef NUM_CORE_FILES +#define NUM_CORE_FILES 5 +#endif + +_Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES, + "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")"); +static int num_cores = NUM_CORE_FILES; + +static int capmode_coredump; +SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, + &capmode_coredump, 0, "Allow processes in capability mode to dump core"); + +static int set_core_nodump_flag = 0; +SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, + 0, "Enable setting the NODUMP flag on coredump files"); + +static int coredump_devctl = 0; +SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, + 0, "Generate a devctl notification when processes coredump"); + +/* + * corefilename[] is protected by the allproc_lock. + */ +static char corefilename[MAXPATHLEN] = { "%N.core" }; +TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); + +static int +sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) +{ + int error; + + sx_xlock(&allproc_lock); + error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), + req); + sx_xunlock(&allproc_lock); + + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | + CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", + "Process corefile name format string"); + +static int +sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) +{ + int error; + int new_val; + + new_val = num_cores; + error = sysctl_handle_int(oidp, &new_val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (new_val > MAX_NUM_CORE_FILES) + new_val = MAX_NUM_CORE_FILES; + if (new_val < 0) + new_val = 0; + num_cores = new_val; + return (0); +} +SYSCTL_PROC(_debug, OID_AUTO, ncores, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), + sysctl_debug_num_cores_check, "I", + "Maximum number of generated process corefiles while using index format"); + +static void +vnode_close_locked(struct thread *td, struct vnode *vp) +{ + + VOP_UNLOCK(vp); + vn_close(vp, FWRITE, td->td_ucred, td); +} + +int +core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len, + off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid, + struct thread *td) +{ + struct coredump_vnode_ctx *ctx = cdw->ctx; + + return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base), + len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, + cred, ctx->fcred, resid, td)); +} + +int +core_vn_extend(const struct coredump_writer *cdw, off_t newsz, + struct ucred *cred) +{ + struct coredump_vnode_ctx *ctx = cdw->ctx; + struct mount *mp; + int error; + + error = vn_start_write(ctx->vp, &mp, V_WAIT); + if (error != 0) + return (error); + vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY); + error = vn_truncate_locked(ctx->vp, newsz, false, cred); + VOP_UNLOCK(ctx->vp); + vn_finished_write(mp); + return (error); +} + +/* + * If the core format has a %I in it, then we need to check + * for existing corefiles before defining a name. + * To do this we iterate over 0..ncores to find a + * non-existing core file name to use. If all core files are + * already used we choose the oldest one. + */ +static int +corefile_open_last(struct thread *td, char *name, int indexpos, + int indexlen, int ncores, struct vnode **vpp) +{ + struct vnode *oldvp, *nextvp, *vp; + struct vattr vattr; + struct nameidata nd; + int error, i, flags, oflags, cmode; + char ch; + struct timespec lasttime; + + nextvp = oldvp = NULL; + cmode = S_IRUSR | S_IWUSR; + oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | + (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); + + for (i = 0; i < ncores; i++) { + flags = O_CREAT | FWRITE | O_NOFOLLOW; + + ch = name[indexpos + indexlen]; + (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen, + i); + name[indexpos + indexlen] = ch; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); + error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, + NULL); + if (error != 0) + break; + + vp = nd.ni_vp; + NDFREE_PNBUF(&nd); + if ((flags & O_CREAT) == O_CREAT) { + nextvp = vp; + break; + } + + error = VOP_GETATTR(vp, &vattr, td->td_ucred); + if (error != 0) { + vnode_close_locked(td, vp); + break; + } + + if (oldvp == NULL || + lasttime.tv_sec > vattr.va_mtime.tv_sec || + (lasttime.tv_sec == vattr.va_mtime.tv_sec && + lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) { + if (oldvp != NULL) + vn_close(oldvp, FWRITE, td->td_ucred, td); + oldvp = vp; + VOP_UNLOCK(oldvp); + lasttime = vattr.va_mtime; + } else { + vnode_close_locked(td, vp); + } + } + + if (oldvp != NULL) { + if (nextvp == NULL) { + if ((td->td_proc->p_flag & P_SUGID) != 0) { + error = EFAULT; + vn_close(oldvp, FWRITE, td->td_ucred, td); + } else { + nextvp = oldvp; + error = vn_lock(nextvp, LK_EXCLUSIVE); + if (error != 0) { + vn_close(nextvp, FWRITE, td->td_ucred, + td); + nextvp = NULL; + } + } + } else { + vn_close(oldvp, FWRITE, td->td_ucred, td); + } + } + if (error != 0) { + if (nextvp != NULL) + vnode_close_locked(td, oldvp); + } else { + *vpp = nextvp; + } + + return (error); +} + +/* + * corefile_open(comm, uid, pid, td, compress, vpp, namep) + * Expand the name described in corefilename, using name, uid, and pid + * and open/create core file. + * corefilename is a printf-like string, with three format specifiers: + * %N name of process ("name") + * %P process id (pid) + * %U user id (uid) + * For example, "%N.core" is the default; they can be disabled completely + * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". + * This is controlled by the sysctl variable kern.corefile (see above). + */ +static int +corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, + int compress, int signum, struct vnode **vpp, char **namep) +{ + struct sbuf sb; + struct nameidata nd; + const char *format; + char *hostname, *name; + int cmode, error, flags, i, indexpos, indexlen, oflags, ncores; + + hostname = NULL; + format = corefilename; + name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); + indexlen = 0; + indexpos = -1; + ncores = num_cores; + (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); + sx_slock(&allproc_lock); + for (i = 0; format[i] != '\0'; i++) { + switch (format[i]) { + case '%': /* Format character */ + i++; + switch (format[i]) { + case '%': + sbuf_putc(&sb, '%'); + break; + case 'H': /* hostname */ + if (hostname == NULL) { + hostname = malloc(MAXHOSTNAMELEN, + M_TEMP, M_WAITOK); + } + getcredhostname(td->td_ucred, hostname, + MAXHOSTNAMELEN); + sbuf_cat(&sb, hostname); + break; + case 'I': /* autoincrementing index */ + if (indexpos != -1) { + sbuf_printf(&sb, "%%I"); + break; + } + + indexpos = sbuf_len(&sb); + sbuf_printf(&sb, "%u", ncores - 1); + indexlen = sbuf_len(&sb) - indexpos; + break; + case 'N': /* process name */ + sbuf_printf(&sb, "%s", comm); + break; + case 'P': /* process id */ + sbuf_printf(&sb, "%u", pid); + break; + case 'S': /* signal number */ + sbuf_printf(&sb, "%i", signum); + break; + case 'U': /* user id */ + sbuf_printf(&sb, "%u", uid); + break; + default: + log(LOG_ERR, + "Unknown format character %c in " + "corename `%s'\n", format[i], format); + break; + } + break; + default: + sbuf_putc(&sb, format[i]); + break; + } + } + sx_sunlock(&allproc_lock); + free(hostname, M_TEMP); + if (compress == COMPRESS_GZIP) + sbuf_cat(&sb, GZIP_SUFFIX); + else if (compress == COMPRESS_ZSTD) + sbuf_cat(&sb, ZSTD_SUFFIX); + if (sbuf_error(&sb) != 0) { + log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " + "long\n", (long)pid, comm, (u_long)uid); + sbuf_delete(&sb); + free(name, M_TEMP); + return (ENOMEM); + } + sbuf_finish(&sb); + sbuf_delete(&sb); + + if (indexpos != -1) { + error = corefile_open_last(td, name, indexpos, indexlen, ncores, + vpp); + if (error != 0) { + log(LOG_ERR, + "pid %d (%s), uid (%u): Path `%s' failed " + "on initial open test, error = %d\n", + pid, comm, uid, name, error); + } + } else { + cmode = S_IRUSR | S_IWUSR; + oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | + (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); + flags = O_CREAT | FWRITE | O_NOFOLLOW; + if ((td->td_proc->p_flag & P_SUGID) != 0) + flags |= O_EXCL; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); + error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, + NULL); + if (error == 0) { + *vpp = nd.ni_vp; + NDFREE_PNBUF(&nd); + } + } + + if (error != 0) { +#ifdef AUDIT + audit_proc_coredump(td, name, error); +#endif + free(name, M_TEMP); + return (error); + } + *namep = name; + return (0); +} + +/* + * The vnode dumper is the traditional coredump handler. Our policy and limits + * are generally checked already, so it creates the coredump name and passes on + * a vnode and a size limit to the process-specific coredump routine if there is + * one. If there _is not_ one, it returns ENOSYS; otherwise it returns the + * error from the process-specific routine. + */ +int +coredump_vnode(struct thread *td, off_t limit) +{ + struct proc *p = td->td_proc; + struct ucred *cred = td->td_ucred; + struct vnode *vp; + struct coredump_vnode_ctx wctx; + struct coredump_writer cdw = { }; + struct flock lf; + struct vattr vattr; + size_t fullpathsize; + int error, error1, jid, locked, ppid, sig; + char *name; /* name of corefile */ + void *rl_cookie; + char *fullpath, *freepath = NULL; + struct sbuf *sb; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; + PROC_UNLOCK(p); + + error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, + compress_user_cores, sig, &vp, &name); + if (error != 0) + return (error); + + /* + * Don't dump to non-regular files or files with links. + * Do not dump into system files. Effective user must own the corefile. + */ + if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || + vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 || + vattr.va_uid != cred->cr_uid) { + VOP_UNLOCK(vp); + error = EFAULT; + goto out; + } + + VOP_UNLOCK(vp); + + /* Postpone other writers, including core dumps of other processes. */ + rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); + + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_WRLCK; + locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); + + VATTR_NULL(&vattr); + vattr.va_size = 0; + if (set_core_nodump_flag) + vattr.va_flags = UF_NODUMP; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VOP_SETATTR(vp, &vattr, cred); + VOP_UNLOCK(vp); + PROC_LOCK(p); + p->p_acflag |= ACORE; + PROC_UNLOCK(p); + + wctx.vp = vp; + wctx.fcred = NOCRED; + + cdw.ctx = &wctx; + cdw.write_fn = core_vn_write; + cdw.extend_fn = core_vn_extend; + + if (p->p_sysent->sv_coredump != NULL) { + error = p->p_sysent->sv_coredump(td, &cdw, limit, 0); + } else { + error = ENOSYS; + } + + if (locked) { + lf.l_type = F_UNLCK; + VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); + } + vn_rangelock_unlock(vp, rl_cookie); + + /* + * Notify the userland helper that a process triggered a core dump. + * This allows the helper to run an automated debugging session. + */ + if (error != 0 || coredump_devctl == 0) + goto out; + sb = sbuf_new_auto(); + if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0) + goto out2; + sbuf_cat(sb, "comm=\""); + devctl_safe_quote_sb(sb, fullpath); + free(freepath, M_TEMP); + sbuf_cat(sb, "\" core=\""); + + /* + * We can't lookup core file vp directly. When we're replacing a core, and + * other random times, we flush the name cache, so it will fail. Instead, + * if the path of the core is relative, add the current dir in front if it. + */ + if (name[0] != '/') { + fullpathsize = MAXPATHLEN; + freepath = malloc(fullpathsize, M_TEMP, M_WAITOK); + if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) { + free(freepath, M_TEMP); + goto out2; + } + devctl_safe_quote_sb(sb, fullpath); + free(freepath, M_TEMP); + sbuf_putc(sb, '/'); + } + devctl_safe_quote_sb(sb, name); + sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); + if (sbuf_finish(sb) == 0) + devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); +out2: + sbuf_delete(sb); +out: + error1 = vn_close(vp, FWRITE, cred, td); + if (error == 0) + error = error1; +#ifdef AUDIT + audit_proc_coredump(td, name, error); +#endif + free(name, M_TEMP); + return (error); +} diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -1562,9 +1563,6 @@ TAILQ_HEAD(note_info_list, note_info); -extern int compress_user_cores; -extern int compress_user_cores_level; - static void cb_put_phdr(vm_map_entry_t, void *); static void cb_size_segment(vm_map_entry_t, void *); static void each_dumpable_segment(struct thread *, segment_callback, void *, diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -1998,18 +1999,6 @@ return (error); } -int -core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len, - off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid, - struct thread *td) -{ - struct coredump_vnode_ctx *ctx = cdw->ctx; - - return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base), - len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, - cred, ctx->fcred, resid, td)); -} - int core_write(struct coredump_params *cp, const void *base, size_t len, off_t offset, enum uio_seg seg, size_t *resid) @@ -2018,24 +2007,6 @@ cp->active_cred, resid, cp->td)); } -int -core_vn_extend(const struct coredump_writer *cdw, off_t newsz, - struct ucred *cred) -{ - struct coredump_vnode_ctx *ctx = cdw->ctx; - struct mount *mp; - int error; - - error = vn_start_write(ctx->vp, &mp, V_WAIT); - if (error != 0) - return (error); - vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY); - error = vn_truncate_locked(ctx->vp, newsz, false, cred); - VOP_UNLOCK(ctx->vp); - vn_finished_write(mp); - return (error); -} - static int core_extend(struct coredump_params *cp, off_t newsz) { diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -81,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +102,6 @@ SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); -static int coredump(struct thread *); static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi); static int issignal(struct thread *td); @@ -127,11 +126,6 @@ .f_event = filt_signal, }; -static int kern_logsigexit = 1; -SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, - &kern_logsigexit, 0, - "Log processes quitting on abnormal signals to syslog(3)"); - static int kern_forcesigexit = 1; SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW, &kern_forcesigexit, 0, "Force trap signal to be handled"); @@ -194,26 +188,6 @@ (cr1)->cr_ruid == (cr2)->cr_uid || \ (cr1)->cr_uid == (cr2)->cr_uid) -static int sugid_coredump; -SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, - &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); - -static int capmode_coredump; -SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, - &capmode_coredump, 0, "Allow processes in capability mode to dump core"); - -static int do_coredump = 1; -SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, - &do_coredump, 0, "Enable/Disable coredumps"); - -static int set_core_nodump_flag = 0; -SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, - 0, "Enable setting the NODUMP flag on coredump files"); - -static int coredump_devctl = 0; -SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, - 0, "Generate a devctl notification when processes coredump"); - /* * Signal properties and actions. * The array below categorizes the signals and their default actions @@ -785,6 +759,13 @@ return (0); } +bool +sig_do_core(int sig) +{ + + return ((sigprop(sig) & SIGPROP_CORE) != 0); +} + static bool sigact_flag_test(const struct sigaction *act, int flag) { @@ -3644,82 +3625,6 @@ kern_psignal(p, SIGKILL); } -/* - * Force the current process to exit with the specified signal, dumping core - * if appropriate. We bypass the normal tests for masked and caught signals, - * allowing unrecoverable failures to terminate the process without changing - * signal state. Mark the accounting record with the signal termination. - * If dumping core, save the signal number for the debugger. Calls exit and - * does not return. - */ -void -sigexit(struct thread *td, int sig) -{ - struct proc *p = td->td_proc; - const char *coreinfo; - int rv; - bool logexit; - - PROC_LOCK_ASSERT(p, MA_OWNED); - proc_set_p2_wexit(p); - - p->p_acflag |= AXSIG; - if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0) - logexit = kern_logsigexit != 0; - else - logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0; - - /* - * We must be single-threading to generate a core dump. This - * ensures that the registers in the core file are up-to-date. - * Also, the ELF dump handler assumes that the thread list doesn't - * change out from under it. - * - * XXX If another thread attempts to single-thread before us - * (e.g. via fork()), we won't get a dump at all. - */ - if ((sigprop(sig) & SIGPROP_CORE) && - thread_single(p, SINGLE_NO_EXIT) == 0) { - p->p_sig = sig; - /* - * Log signals which would cause core dumps - * (Log as LOG_INFO to appease those who don't want - * these messages.) - * XXX : Todo, as well as euid, write out ruid too - * Note that coredump() drops proc lock. - */ - rv = coredump(td); - switch (rv) { - case 0: - sig |= WCOREFLAG; - coreinfo = " (core dumped)"; - break; - case EFAULT: - coreinfo = " (no core dump - bad address)"; - break; - case EINVAL: - coreinfo = " (no core dump - invalid argument)"; - break; - case EFBIG: - coreinfo = " (no core dump - too large)"; - break; - default: - coreinfo = " (no core dump - other error)"; - break; - } - if (logexit) - log(LOG_INFO, - "pid %d (%s), jid %d, uid %d: exited on " - "signal %d%s\n", p->p_pid, p->p_comm, - p->p_ucred->cr_prison->pr_id, - td->td_ucred->cr_uid, - sig &~ WCOREFLAG, coreinfo); - } else - PROC_UNLOCK(p); - exit1(td, 0, sig); - /* NOTREACHED */ -} - /* * Send queued SIGCHLD to parent when child process's state * is changed. @@ -3813,486 +3718,6 @@ sigparent(p, reason, status); } -#define MAX_NUM_CORE_FILES 100000 -#ifndef NUM_CORE_FILES -#define NUM_CORE_FILES 5 -#endif -CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES); -static int num_cores = NUM_CORE_FILES; - -static int -sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) -{ - int error; - int new_val; - - new_val = num_cores; - error = sysctl_handle_int(oidp, &new_val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (new_val > MAX_NUM_CORE_FILES) - new_val = MAX_NUM_CORE_FILES; - if (new_val < 0) - new_val = 0; - num_cores = new_val; - return (0); -} -SYSCTL_PROC(_debug, OID_AUTO, ncores, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), - sysctl_debug_num_cores_check, "I", - "Maximum number of generated process corefiles while using index format"); - -#define GZIP_SUFFIX ".gz" -#define ZSTD_SUFFIX ".zst" - -int compress_user_cores = 0; - -static int -sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS) -{ - int error, val; - - val = compress_user_cores; - error = sysctl_handle_int(oidp, &val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (val != 0 && !compressor_avail(val)) - return (EINVAL); - compress_user_cores = val; - return (error); -} -SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, - CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), - sysctl_compress_user_cores, "I", - "Enable compression of user corefiles (" - __XSTRING(COMPRESS_GZIP) " = gzip, " - __XSTRING(COMPRESS_ZSTD) " = zstd)"); - -int compress_user_cores_level = 6; -SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN, - &compress_user_cores_level, 0, - "Corefile compression level"); - -/* - * Protect the access to corefilename[] by allproc_lock. - */ -#define corefilename_lock allproc_lock - -static char corefilename[MAXPATHLEN] = {"%N.core"}; -TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); - -static int -sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) -{ - int error; - - sx_xlock(&corefilename_lock); - error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), - req); - sx_xunlock(&corefilename_lock); - - return (error); -} -SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | - CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", - "Process corefile name format string"); - -static void -vnode_close_locked(struct thread *td, struct vnode *vp) -{ - - VOP_UNLOCK(vp); - vn_close(vp, FWRITE, td->td_ucred, td); -} - -/* - * If the core format has a %I in it, then we need to check - * for existing corefiles before defining a name. - * To do this we iterate over 0..ncores to find a - * non-existing core file name to use. If all core files are - * already used we choose the oldest one. - */ -static int -corefile_open_last(struct thread *td, char *name, int indexpos, - int indexlen, int ncores, struct vnode **vpp) -{ - struct vnode *oldvp, *nextvp, *vp; - struct vattr vattr; - struct nameidata nd; - int error, i, flags, oflags, cmode; - char ch; - struct timespec lasttime; - - nextvp = oldvp = NULL; - cmode = S_IRUSR | S_IWUSR; - oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | - (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); - - for (i = 0; i < ncores; i++) { - flags = O_CREAT | FWRITE | O_NOFOLLOW; - - ch = name[indexpos + indexlen]; - (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen, - i); - name[indexpos + indexlen] = ch; - - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); - error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, - NULL); - if (error != 0) - break; - - vp = nd.ni_vp; - NDFREE_PNBUF(&nd); - if ((flags & O_CREAT) == O_CREAT) { - nextvp = vp; - break; - } - - error = VOP_GETATTR(vp, &vattr, td->td_ucred); - if (error != 0) { - vnode_close_locked(td, vp); - break; - } - - if (oldvp == NULL || - lasttime.tv_sec > vattr.va_mtime.tv_sec || - (lasttime.tv_sec == vattr.va_mtime.tv_sec && - lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) { - if (oldvp != NULL) - vn_close(oldvp, FWRITE, td->td_ucred, td); - oldvp = vp; - VOP_UNLOCK(oldvp); - lasttime = vattr.va_mtime; - } else { - vnode_close_locked(td, vp); - } - } - - if (oldvp != NULL) { - if (nextvp == NULL) { - if ((td->td_proc->p_flag & P_SUGID) != 0) { - error = EFAULT; - vn_close(oldvp, FWRITE, td->td_ucred, td); - } else { - nextvp = oldvp; - error = vn_lock(nextvp, LK_EXCLUSIVE); - if (error != 0) { - vn_close(nextvp, FWRITE, td->td_ucred, - td); - nextvp = NULL; - } - } - } else { - vn_close(oldvp, FWRITE, td->td_ucred, td); - } - } - if (error != 0) { - if (nextvp != NULL) - vnode_close_locked(td, oldvp); - } else { - *vpp = nextvp; - } - - return (error); -} - -/* - * corefile_open(comm, uid, pid, td, compress, vpp, namep) - * Expand the name described in corefilename, using name, uid, and pid - * and open/create core file. - * corefilename is a printf-like string, with three format specifiers: - * %N name of process ("name") - * %P process id (pid) - * %U user id (uid) - * For example, "%N.core" is the default; they can be disabled completely - * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". - * This is controlled by the sysctl variable kern.corefile (see above). - */ -static int -corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, - int compress, int signum, struct vnode **vpp, char **namep) -{ - struct sbuf sb; - struct nameidata nd; - const char *format; - char *hostname, *name; - int cmode, error, flags, i, indexpos, indexlen, oflags, ncores; - - hostname = NULL; - format = corefilename; - name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); - indexlen = 0; - indexpos = -1; - ncores = num_cores; - (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); - sx_slock(&corefilename_lock); - for (i = 0; format[i] != '\0'; i++) { - switch (format[i]) { - case '%': /* Format character */ - i++; - switch (format[i]) { - case '%': - sbuf_putc(&sb, '%'); - break; - case 'H': /* hostname */ - if (hostname == NULL) { - hostname = malloc(MAXHOSTNAMELEN, - M_TEMP, M_WAITOK); - } - getcredhostname(td->td_ucred, hostname, - MAXHOSTNAMELEN); - sbuf_cat(&sb, hostname); - break; - case 'I': /* autoincrementing index */ - if (indexpos != -1) { - sbuf_printf(&sb, "%%I"); - break; - } - - indexpos = sbuf_len(&sb); - sbuf_printf(&sb, "%u", ncores - 1); - indexlen = sbuf_len(&sb) - indexpos; - break; - case 'N': /* process name */ - sbuf_printf(&sb, "%s", comm); - break; - case 'P': /* process id */ - sbuf_printf(&sb, "%u", pid); - break; - case 'S': /* signal number */ - sbuf_printf(&sb, "%i", signum); - break; - case 'U': /* user id */ - sbuf_printf(&sb, "%u", uid); - break; - default: - log(LOG_ERR, - "Unknown format character %c in " - "corename `%s'\n", format[i], format); - break; - } - break; - default: - sbuf_putc(&sb, format[i]); - break; - } - } - sx_sunlock(&corefilename_lock); - free(hostname, M_TEMP); - if (compress == COMPRESS_GZIP) - sbuf_cat(&sb, GZIP_SUFFIX); - else if (compress == COMPRESS_ZSTD) - sbuf_cat(&sb, ZSTD_SUFFIX); - if (sbuf_error(&sb) != 0) { - log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " - "long\n", (long)pid, comm, (u_long)uid); - sbuf_delete(&sb); - free(name, M_TEMP); - return (ENOMEM); - } - sbuf_finish(&sb); - sbuf_delete(&sb); - - if (indexpos != -1) { - error = corefile_open_last(td, name, indexpos, indexlen, ncores, - vpp); - if (error != 0) { - log(LOG_ERR, - "pid %d (%s), uid (%u): Path `%s' failed " - "on initial open test, error = %d\n", - pid, comm, uid, name, error); - } - } else { - cmode = S_IRUSR | S_IWUSR; - oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | - (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); - flags = O_CREAT | FWRITE | O_NOFOLLOW; - if ((td->td_proc->p_flag & P_SUGID) != 0) - flags |= O_EXCL; - - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); - error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, - NULL); - if (error == 0) { - *vpp = nd.ni_vp; - NDFREE_PNBUF(&nd); - } - } - - if (error != 0) { -#ifdef AUDIT - audit_proc_coredump(td, name, error); -#endif - free(name, M_TEMP); - return (error); - } - *namep = name; - return (0); -} - -/* - * Dump a process' core. The main routine does some - * policy checking, and creates the name of the coredump; - * then it passes on a vnode and a size limit to the process-specific - * coredump routine if there is one; if there _is not_ one, it returns - * ENOSYS; otherwise it returns the error from the process-specific routine. - */ - -static int -coredump(struct thread *td) -{ - struct proc *p = td->td_proc; - struct ucred *cred = td->td_ucred; - struct vnode *vp; - struct coredump_vnode_ctx wctx; - struct coredump_writer cdw; - struct flock lf; - struct vattr vattr; - size_t fullpathsize; - int error, error1, jid, locked, ppid, sig; - char *name; /* name of corefile */ - void *rl_cookie; - off_t limit; - char *fullpath, *freepath = NULL; - struct sbuf *sb; - - PROC_LOCK_ASSERT(p, MA_OWNED); - MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); - - if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || - (p->p_flag2 & P2_NOTRACE) != 0) { - PROC_UNLOCK(p); - return (EFAULT); - } - - /* - * Note that the bulk of limit checking is done after - * the corefile is created. The exception is if the limit - * for corefiles is 0, in which case we don't bother - * creating the corefile at all. This layout means that - * a corefile is truncated instead of not being created, - * if it is larger than the limit. - */ - limit = (off_t)lim_cur(td, RLIMIT_CORE); - if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { - PROC_UNLOCK(p); - return (EFBIG); - } - - ppid = p->p_oppid; - sig = p->p_sig; - jid = p->p_ucred->cr_prison->pr_id; - PROC_UNLOCK(p); - - error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, - compress_user_cores, p->p_sig, &vp, &name); - if (error != 0) - return (error); - - /* - * Don't dump to non-regular files or files with links. - * Do not dump into system files. Effective user must own the corefile. - */ - if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || - vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 || - vattr.va_uid != cred->cr_uid) { - VOP_UNLOCK(vp); - error = EFAULT; - goto out; - } - - VOP_UNLOCK(vp); - - /* Postpone other writers, including core dumps of other processes. */ - rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); - - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - lf.l_type = F_WRLCK; - locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); - - VATTR_NULL(&vattr); - vattr.va_size = 0; - if (set_core_nodump_flag) - vattr.va_flags = UF_NODUMP; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - VOP_SETATTR(vp, &vattr, cred); - VOP_UNLOCK(vp); - PROC_LOCK(p); - p->p_acflag |= ACORE; - PROC_UNLOCK(p); - - wctx.vp = vp; - wctx.fcred = NOCRED; - - cdw.ctx = &wctx; - cdw.write_fn = core_vn_write; - cdw.extend_fn = core_vn_extend; - - if (p->p_sysent->sv_coredump != NULL) { - error = p->p_sysent->sv_coredump(td, &cdw, limit, 0); - } else { - error = ENOSYS; - } - - if (locked) { - lf.l_type = F_UNLCK; - VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); - } - vn_rangelock_unlock(vp, rl_cookie); - - /* - * Notify the userland helper that a process triggered a core dump. - * This allows the helper to run an automated debugging session. - */ - if (error != 0 || coredump_devctl == 0) - goto out; - sb = sbuf_new_auto(); - if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0) - goto out2; - sbuf_cat(sb, "comm=\""); - devctl_safe_quote_sb(sb, fullpath); - free(freepath, M_TEMP); - sbuf_cat(sb, "\" core=\""); - - /* - * We can't lookup core file vp directly. When we're replacing a core, and - * other random times, we flush the name cache, so it will fail. Instead, - * if the path of the core is relative, add the current dir in front if it. - */ - if (name[0] != '/') { - fullpathsize = MAXPATHLEN; - freepath = malloc(fullpathsize, M_TEMP, M_WAITOK); - if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) { - free(freepath, M_TEMP); - goto out2; - } - devctl_safe_quote_sb(sb, fullpath); - free(freepath, M_TEMP); - sbuf_putc(sb, '/'); - } - devctl_safe_quote_sb(sb, name); - sbuf_putc(sb, '"'); - - sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", - jid, p->p_pid, ppid, sig); - if (sbuf_finish(sb) == 0) - devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); -out2: - sbuf_delete(sb); -out: - error1 = vn_close(vp, FWRITE, cred, td); - if (error == 0) - error = error1; -#ifdef AUDIT - audit_proc_coredump(td, name, error); -#endif - free(name, M_TEMP); - return (error); -} - /* * Nonexistent system call-- signal process (may want to handle it). Flag * error in case process won't see signal immediately (blocked or ignored). diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c new file mode 100644 --- /dev/null +++ b/sys/kern/kern_ucoredump.c @@ -0,0 +1,212 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1982, 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int coredump(struct thread *td); + +int compress_user_cores = 0; + +static int kern_logsigexit = 1; +SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, + &kern_logsigexit, 0, + "Log processes quitting on abnormal signals to syslog(3)"); + +static int sugid_coredump; +SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, + &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); + +static int do_coredump = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, + &do_coredump, 0, "Enable/Disable coredumps"); + +static int +sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS) +{ + int error, val; + + val = compress_user_cores; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (val != 0 && !compressor_avail(val)) + return (EINVAL); + compress_user_cores = val; + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, + CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), + sysctl_compress_user_cores, "I", + "Enable compression of user corefiles (" + __XSTRING(COMPRESS_GZIP) " = gzip, " + __XSTRING(COMPRESS_ZSTD) " = zstd)"); + +int compress_user_cores_level = 6; +SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN, + &compress_user_cores_level, 0, + "Corefile compression level"); + +/* + * Force the current process to exit with the specified signal, dumping core + * if appropriate. We bypass the normal tests for masked and caught signals, + * allowing unrecoverable failures to terminate the process without changing + * signal state. Mark the accounting record with the signal termination. + * If dumping core, save the signal number for the debugger. Calls exit and + * does not return. + */ +void +sigexit(struct thread *td, int sig) +{ + struct proc *p = td->td_proc; + const char *coreinfo; + int rv; + bool logexit; + + PROC_LOCK_ASSERT(p, MA_OWNED); + proc_set_p2_wexit(p); + + p->p_acflag |= AXSIG; + if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0) + logexit = kern_logsigexit != 0; + else + logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0; + + /* + * We must be single-threading to generate a core dump. This + * ensures that the registers in the core file are up-to-date. + * Also, the ELF dump handler assumes that the thread list doesn't + * change out from under it. + * + * XXX If another thread attempts to single-thread before us + * (e.g. via fork()), we won't get a dump at all. + */ + if (sig_do_core(sig) && thread_single(p, SINGLE_NO_EXIT) == 0) { + p->p_sig = sig; + /* + * Log signals which would cause core dumps + * (Log as LOG_INFO to appease those who don't want + * these messages.) + * XXX : Todo, as well as euid, write out ruid too + * Note that coredump() drops proc lock. + */ + rv = coredump(td); + switch (rv) { + case 0: + sig |= WCOREFLAG; + coreinfo = " (core dumped)"; + break; + case EFAULT: + coreinfo = " (no core dump - bad address)"; + break; + case EINVAL: + coreinfo = " (no core dump - invalid argument)"; + break; + case EFBIG: + coreinfo = " (no core dump - too large)"; + break; + default: + coreinfo = " (no core dump - other error)"; + break; + } + if (logexit) + log(LOG_INFO, + "pid %d (%s), jid %d, uid %d: exited on " + "signal %d%s\n", p->p_pid, p->p_comm, + p->p_ucred->cr_prison->pr_id, + td->td_ucred->cr_uid, + sig &~ WCOREFLAG, coreinfo); + } else + PROC_UNLOCK(p); + exit1(td, 0, sig); + /* NOTREACHED */ +} + + +/* + * Dump a process' core. The main routine does some + * policy checking, and creates the name of the coredump; + * then it passes on a vnode and a size limit to the process-specific + * coredump routine if there is one; if there _is not_ one, it returns + * ENOSYS; otherwise it returns the error from the process-specific routine. + */ +static int +coredump(struct thread *td) +{ + struct proc *p = td->td_proc; + off_t limit; + int error; + + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); + + if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || + (p->p_flag2 & P2_NOTRACE) != 0) { + PROC_UNLOCK(p); + return (EFAULT); + } + + /* + * Note that the bulk of limit checking is done after + * the corefile is created. The exception is if the limit + * for corefiles is 0, in which case we don't bother + * creating the corefile at all. This layout means that + * a corefile is truncated instead of not being created, + * if it is larger than the limit. + */ + limit = (off_t)lim_cur(td, RLIMIT_CORE); + if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { + PROC_UNLOCK(p); + return (EFBIG); + } + + error = coredump_vnode(td, limit); + PROC_LOCK_ASSERT(p, MA_NOTOWNED); + + return (error); +} diff --git a/sys/sys/exec.h b/sys/sys/exec.h --- a/sys/sys/exec.h +++ b/sys/sys/exec.h @@ -37,8 +37,6 @@ #ifndef _SYS_EXEC_H_ #define _SYS_EXEC_H_ -#include - /* * Before ps_args existed, the following structure, found at the top of * the user stack of each user process, was used by ps(1) to locate @@ -59,42 +57,6 @@ unsigned int ps_nenvstr; /* the number of environment strings */ }; -/* Coredump output parameters. */ -struct coredump_params; -struct coredump_writer; -struct thread; -struct ucred; - -typedef int coredump_init_fn(const struct coredump_writer *, - const struct coredump_params *, int); -typedef int coredump_write_fn(const struct coredump_writer *, const void *, size_t, - off_t, enum uio_seg, struct ucred *, size_t *, struct thread *); -typedef int coredump_extend_fn(const struct coredump_writer *, off_t, - struct ucred *); - -struct coredump_vnode_ctx { - struct vnode *vp; - struct ucred *fcred; -}; - -coredump_write_fn core_vn_write; -coredump_extend_fn core_vn_extend; - -struct coredump_writer { - void *ctx; - coredump_init_fn *init_fn; - coredump_write_fn *write_fn; - coredump_extend_fn *extend_fn; -}; - -struct coredump_params { - off_t offset; - struct ucred *active_cred; - struct thread *td; - const struct coredump_writer *cdw; - struct compressor *comp; -}; - struct image_params; struct execsw { @@ -133,16 +95,6 @@ enum uio_seg; -#define CORE_BUF_SIZE (16 * 1024) - -int core_write(struct coredump_params *, const void *, size_t, off_t, - enum uio_seg, size_t *); -int core_output(char *, size_t, off_t, struct coredump_params *, void *); -int sbuf_drain_core_output(void *, const char *, int); - -extern int coredump_pack_fileinfo; -extern int coredump_pack_vmmapinfo; - /* * note: name##_mod cannot be const storage because the * linker_file_sysinit() function modifies _file in the diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -403,6 +403,7 @@ void sigfastblock_clear(struct thread *td); void sigfastblock_fetch(struct thread *td); int sig_intr(void); +bool sig_do_core(int); void siginit(struct proc *p); void signotify(struct thread *td); void sigqueue_delete(struct sigqueue *queue, int sig); diff --git a/sys/sys/ucoredump.h b/sys/sys/ucoredump.h new file mode 100644 --- /dev/null +++ b/sys/sys/ucoredump.h @@ -0,0 +1,68 @@ +/*- + * + * Copyright (c) 2015 Mark Johnston + * Copyright (c) 2025 Kyle Evans + * + * SPDX-License-Identifier: BSD-2-Clause + * + */ + +#ifndef _SYS_UCOREDUMP_H_ +#define _SYS_UCOREDUMP_H_ + +#ifdef _KERNEL + +#include + +/* Coredump output parameters. */ +struct coredump_params; +struct coredump_writer; +struct thread; +struct ucred; + +typedef int coredump_init_fn(const struct coredump_writer *, + const struct coredump_params *, int); +typedef int coredump_write_fn(const struct coredump_writer *, const void *, size_t, + off_t, enum uio_seg, struct ucred *, size_t *, struct thread *); +typedef int coredump_extend_fn(const struct coredump_writer *, off_t, + struct ucred *); + +struct coredump_vnode_ctx { + struct vnode *vp; + struct ucred *fcred; +}; + +coredump_write_fn core_vn_write; +coredump_extend_fn core_vn_extend; +int coredump_vnode(struct thread *, off_t); + +struct coredump_writer { + void *ctx; + coredump_init_fn *init_fn; + coredump_write_fn *write_fn; + coredump_extend_fn *extend_fn; +}; + +struct coredump_params { + off_t offset; + struct ucred *active_cred; + struct thread *td; + const struct coredump_writer *cdw; + struct compressor *comp; +}; + +#define CORE_BUF_SIZE (16 * 1024) + +int core_write(struct coredump_params *, const void *, size_t, off_t, + enum uio_seg, size_t *); +int core_output(char *, size_t, off_t, struct coredump_params *, void *); +int sbuf_drain_core_output(void *, const char *, int); + +extern int coredump_pack_fileinfo; +extern int coredump_pack_vmmapinfo; + +extern int compress_user_cores; +extern int compress_user_cores_level; + +#endif /* _KERNEL */ +#endif /* _SYS_UCOREDUMP_H_ */