diff --git a/share/man/man5/core.5 b/share/man/man5/core.5 --- a/share/man/man5/core.5 +++ b/share/man/man5/core.5 @@ -48,26 +48,6 @@ (In this event, the decision to save the core file is arbitrary, see .Xr savecore 8 . ) .Pp -The maximum size of a core file is limited by the -.Dv RLIMIT_CORE -.Xr setrlimit 2 -limit. -Files which would be larger than the limit are not created. -.Pp -With a large limit, a process that had mapped a very large, -and perhaps sparsely populated, virtual memory region, could take -a very long time to create core dumps. -The system ignores all signals sent to a process writing a core file, except -.Dv SIGKILL -which terminates the writing and causes immediate exit of the process. -The behavior of -.Dv SIGKILL -can be disabled by setting tunable -.Xr sysctl 8 -variable -.Va kern.core_dump_can_intr -to zero. -.Pp The name of the file is controlled via the .Xr sysctl 8 variable @@ -107,6 +87,26 @@ .Fx behaviour. .Pp +The maximum size of a core file is limited by the +.Dv RLIMIT_CORE +.Xr setrlimit 2 +limit. +Files which would be larger than the limit are not created. +.Pp +With a large limit, a process that had mapped a very large, +and perhaps sparsely populated, virtual memory region, could take +a very long time to create core dumps. +The system ignores all signals sent to a process writing a core file, except +.Dv SIGKILL +which terminates the writing and causes immediate exit of the process. +The behavior of +.Dv SIGKILL +can be disabled by setting tunable +.Xr sysctl 8 +variable +.Va kern.core_dump_can_intr +to zero. +.Pp By default, a process that changes user or group credentials whether real or effective will not create a corefile. This behaviour can be diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -67,6 +67,7 @@ config_intrhook.9 \ contigmalloc.9 \ copy.9 \ + coredumper_register.9 \ counter.9 \ cpu_machdep.9 \ cpuset.9 \ @@ -903,6 +904,7 @@ copy.9 copyout.9 \ copy.9 copyout_nofault.9 \ copy.9 copystr.9 +MLINKS+=coredumper_register.9 coredumper_unregister.9 MLINKS+=counter.9 counter_u64_alloc.9 \ counter.9 counter_u64_free.9 \ counter.9 counter_u64_add.9 \ diff --git a/share/man/man9/coredumper_register.9 b/share/man/man9/coredumper_register.9 new file mode 100644 --- /dev/null +++ b/share/man/man9/coredumper_register.9 @@ -0,0 +1,156 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Kyle Evans +.\" +.Dd July 18, 2025 +.Dt COREDUMPER_REGISTER 9 +.Os +.Sh NAME +.Nm coredumper_register , +.Nm coredumper_unregister +.Nd loadable user coredumper support +.Sh SYNOPSIS +.In sys/ucoredump.h +.Ft void +.Fn coredumper_register "struct coredumper *cd" +.Ft void +.Fn coredumper_unregister "struct coredumper *cd" +.Pp +.Ft int +.Fn coredumper_probe_fn "struct thread *td" +.Ft int +.Fn coredumper_handle_fn "struct thread *td" "off_t limit" +.Bd -literal +/* Incomplete, but the useful members are depicted here. */ +struct coredumper { + const char *cd_name; + coredumper_probe_fn *cd_probe; + coredumper_handle_fn *cd_handle; +}; +.Ed +.Pp +.Ft int +.Fn coredump_write_fn "const struct coredump_writer *" "const void *" "size_t" \ +"off_t" "enum uio_seg" "struct ucred *" "size_t *" "struct thread *" +.Ft int +.Fn coredump_extend_fn "const struct coredump_writer *" "off_t" "struct ucred *" +.Bd -literal +struct coredump_writer { + void *ctx; + coredump_write_fn *write_fn; + coredump_extend_fn *extend_fn; +}; +.Ed +.Sh DESCRIPTION +The +.Nm +mechanism provides a path for kernel modules to register a new user process core +dumper. +The expected use of +.Nm +is for a module to define the fields of the struct coredumper listed above, then +call +.Fn coredumper_register +at +.Dv MOD_LOAD +time. +A corresponding +.Fn coredumper_unregister +should be called at +.Dv MOD_UNLOAD +time. +Note that +.Fn coredumper_unregister +will block until the specified coredumper is no longer processing coredumps. +.Pp +When a user process is preparing to start dumping core, the kernel will execute +the +.Fn cd_probe +function for each coredumper currently registered. +The +.Fn cd_probe +function is expected to return either -1 if it would decline to dump the +process, or a priority level greater than 0. +The coredumper with the highest priority will handle the coredump. +The following default priorities are defined: +.Bl -tag -width indent +.It Dv COREDUMPER_NOMATCH +This dumper declines dumping the process. +.It Dv COREDUMPER_GENERIC +This dumper will dump the process at the lowest priority. +This priority is not recommended, as the default vnode dumper will bid at +.Dv COREDUMPER_GENERIC +as well. +.It Dv COREDUMPER_SPECIAL +This dumper provides special behavior, and will dump the process at a higher +priority. +.It Dv COREDUMPER_HIGHPRIORITY +This dumper would prefer to handle this coredump. +This may be used by, for instance, a custom or vendor-specific coredump +mechanism that wishes to preempt others. +.El +.Pp +Note that this system has been designed such that the +.Fn cd_probe +function can examine the process in question and make an informed decision. +Different processes being dumped could probe at different priorities in the +same coredumper. +.Pp +Once the highest priority coredumper has been selected, the +.Fn cd_handle +function will be invoked. +The +.Fn cd_handle +will receive both the thread and the +.Dv RLIMIT_CORE +.Xr setrlimit 2 +.Fa limit . +The proc lock will be held on entry, and should be unlocked before the handler +returns. +The +.Fa limit +is typically passed to the +.Fn sv_coredump +that belongs to the process's +.Va p_sysent . +.Pp +The +.Fn cd_handle +function should return either 0 if the dump was successful, or an appropriate +.Xr errno 2 +otherwise. +.Ss Customized Coredump Writers +Custom coredumpers can define their own +.Dv coredump_writer +to pass to +.Fn sv_coredump . +.Pp +The +.Va ctx +member is opaque and only to be used by the coredumper itself. +.Pp +The +.Va write_fn +function will be called by the +.Fn sv_coredump +implementation to write out data. +The +.Va extend_fn +function will be called to enlarge the coredump, in the sense that a hole is +created in any difference between the current size and the new size. +For convenience, the +.Fn core_vn_write +and +.Fn core_vn_extend +functions used by the vnode coredumper are exposed in +.In sys/ucordumper.h , +and the +.Dv coredump_vnode_ctx +defined there should be populated with the vnode to write to. +.Sh SEE ALSO +.Xr setrlimit 2 , +.Xr core 5 +.Sh AUTHORS +This manual page was written by +.An Kyle Evans Aq Mt kevans@FreeBSD.org . diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c --- a/sys/kern/coredump_vnode.c +++ b/sys/kern/coredump_vnode.c @@ -54,6 +54,15 @@ #define NUM_CORE_FILES 5 #endif +static coredumper_handle_fn coredump_vnode; +static struct coredumper vnode_coredumper = { + .cd_name = "vnode_coredumper", + .cd_handle = coredump_vnode, +}; + +SYSINIT(vnode_coredumper_register, SI_SUB_EXEC, SI_ORDER_ANY, + coredumper_register, &vnode_coredumper); + _Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES, "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")"); static int num_cores = NUM_CORE_FILES; @@ -385,7 +394,7 @@ * one. If there _is not_ one, it returns ENOSYS; otherwise it returns the * error from the process-specific routine. */ -int +static int coredump_vnode(struct thread *td, off_t limit) { struct proc *p = td->td_proc; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -2010,7 +2010,7 @@ static int core_extend(struct coredump_params *cp, off_t newsz) { - return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->td->td_ucred)); + return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->active_cred)); } int diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c --- a/sys/kern/kern_ucoredump.c +++ b/sys/kern/kern_ucoredump.c @@ -38,12 +38,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -53,6 +55,11 @@ int compress_user_cores = 0; +static SLIST_HEAD(, coredumper) coredumpers = + SLIST_HEAD_INITIALIZER(coredumpers); +static struct rmlock coredump_rmlock; +RM_SYSINIT(coredump_lock, &coredump_rmlock, "coredump_lock"); + static int kern_logsigexit = 1; SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, &kern_logsigexit, 0, @@ -92,6 +99,30 @@ &compress_user_cores_level, 0, "Corefile compression level"); +void +coredumper_register(struct coredumper *cd) +{ + + blockcount_init(&cd->cd_refcount); + rm_wlock(&coredump_rmlock); + SLIST_INSERT_HEAD(&coredumpers, cd, cd_entry); + rm_wunlock(&coredump_rmlock); +} + +void +coredumper_unregister(struct coredumper *cd) +{ + + rm_wlock(&coredump_rmlock); + SLIST_REMOVE(&coredumpers, cd, coredumper, cd_entry); + rm_wunlock(&coredump_rmlock); + + /* + * Wait for any in-process coredumps to finish before returning. + */ + blockcount_wait(&cd->cd_refcount, NULL, "dumpwait", 0); +} + /* * Force the current process to exit with the specified signal, dumping core * if appropriate. We bypass the normal tests for masked and caught signals, @@ -178,9 +209,11 @@ static int coredump(struct thread *td) { + struct coredumper *iter, *chosen; struct proc *p = td->td_proc; + struct rm_priotracker tracker; off_t limit; - int error; + int error, priority; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); @@ -205,8 +238,51 @@ return (EFBIG); } - error = coredump_vnode(td, limit); + rm_rlock(&coredump_rmlock, &tracker); + priority = -1; + chosen = NULL; + SLIST_FOREACH(iter, &coredumpers, cd_entry) { + if (iter->cd_probe == NULL) { + /* + * If we haven't found anything of a higher priority + * yet, we'll call this a GENERIC. Ideally, we want + * coredumper modules to include a probe function. + */ + if (priority < 0) { + priority = COREDUMPER_GENERIC; + chosen = iter; + } + + continue; + } + + error = (*iter->cd_probe)(td); + if (error < 0) + continue; + + /* + * Higher priority than previous options. + */ + if (error > priority) { + priority = error; + chosen = iter; + } + } + + /* + * Acquire our refcount before we drop the lock so that + * coredumper_unregister() can safely assume that the refcount will only + * go down once it's dropped the rmlock. + */ + blockcount_acquire(&chosen->cd_refcount, 1); + rm_runlock(&coredump_rmlock, &tracker); + + /* Currently, we always have the vnode dumper built in. */ + MPASS(chosen != NULL); + error = ((*chosen->cd_handle)(td, limit)); PROC_LOCK_ASSERT(p, MA_NOTOWNED); + blockcount_release(&chosen->cd_refcount, 1); + return (error); } diff --git a/sys/sys/ucoredump.h b/sys/sys/ucoredump.h --- a/sys/sys/ucoredump.h +++ b/sys/sys/ucoredump.h @@ -38,6 +38,8 @@ #define _SYS_UCOREDUMP_H_ #include +#include +#include /* Coredump output parameters. */ struct coredump_writer; @@ -55,7 +57,6 @@ coredump_write_fn core_vn_write; coredump_extend_fn core_vn_extend; -int coredump_vnode(struct thread *, off_t); struct coredump_writer { void *ctx; @@ -84,4 +85,34 @@ extern int compress_user_cores; extern int compress_user_cores_level; +typedef int coredumper_probe_fn(struct thread *); + +/* + * Some arbitrary values for coredumper probes to return. The highest priority + * we can find wins. It's somewhat expected that a coredumper may want to bid + * differently based on the process in question. Note that probe functions will + * be called with the proc lock held, so they must not sleep. + */ +#define COREDUMPER_NOMATCH (-1) /* Decline to touch it */ +#define COREDUMPER_GENERIC (0) /* I handle coredumps */ +#define COREDUMPER_SPECIAL (50) /* Special handler */ +#define COREDUMPER_HIGH_PRIORITY (100) /* High-priority handler */ + +/* + * The handle functions will be called with the proc lock held, and should + * return with the proc lock dropped. + */ +typedef int coredumper_handle_fn(struct thread *, off_t); + +struct coredumper { + SLIST_ENTRY(coredumper) cd_entry; + const char *cd_name; + coredumper_probe_fn *cd_probe; + coredumper_handle_fn *cd_handle; + blockcount_t cd_refcount; +}; + +void coredumper_register(struct coredumper *); +void coredumper_unregister(struct coredumper *); + #endif /* _SYS_UCOREDUMP_H_ */