diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2 --- a/lib/libc/sys/ptrace.2 +++ b/lib/libc/sys/ptrace.2 @@ -2,7 +2,7 @@ .\" $NetBSD: ptrace.2,v 1.2 1995/02/27 12:35:37 cgd Exp $ .\" .\" This file is in the public domain. -.Dd July 15, 2019 +.Dd April 10, 2021 .Dt PTRACE 2 .Os .Sh NAME @@ -807,6 +807,70 @@ The .Fa data argument is ignored. +.It Dv PT_COREDUMP +This request creates a coredump for the stopped program. +The +.Fa addr +argument specifies a pointer to a +.Vt "struct ptrace_coredump" , +which is defined as follows: +.Bd -literal +struct ptrace_coredump { + int pc_fd; + uint32_t pc_flags; + off_t pc_limit; +}; +.Ed +The fields of the structure are: +.Bl -tag -width pc_flags +.It Dv pc_fd +File descriptor to write the dump to. +It must refer to a regular file, opened for writing. +.It Dv pc_flags +Flags. +The following flags are defined: +.Bl -tag -width PC_COMPRESS +.It Dv PC_COMPRESS +Request compression of the dump. +.It Dv PC_ALL +Include non-dumpable entries into the dump. +The dumper ignores +.Dv MAP_NOCORE +flag of the process map entry, but device mappings are not dumped even with +.Dv PC_ALL +set. +.El +.It Dv pc_limit +Maximum size of the coredump. +Specify zero for no limit. +.El +.Pp +The size of +.Vt "struct ptrace_coredump" +must be passed in +.Fa data . +.Pp +The process must be stopped before dumping core. +A single thread in the target process is temporarily unsuspended +in kernel to write the dump. +If the +.Nm +call fails before a thread is unsuspended, there is no event to +.Xr waitpid 2 +for. +If a thread was unsuspended, it will stop again before the +.Nm +call returns, and the process must be waited upon using +.Xr waitpid 2 +to consume the new stop event. +Since it is hard to deduce whether a thread was unsuspended before +an error occurred, it is recommended to unconditionally perform +.Xr waitpid 2 +with +.Dv WNOHANG +flag after +.Dv PT_COREDUMP , +and silently accept zero result from it. .El .Sh ARM MACHINE-SPECIFIC REQUESTS .Bl -tag -width "Dv PT_SETVFPREGS" diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -429,4 +429,11 @@ int32_t stbcnt; }; +struct ptrace_coredump32 { + int pc_fd; + uint32_t pc_flags; + uint32_t pc_limit1, pc_limit2; +}; + + #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -932,6 +932,7 @@ struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; + struct ptrace_coredump pc; struct dbreg32 dbreg; struct fpreg32 fpreg; struct reg32 reg; @@ -943,6 +944,7 @@ struct ptrace_io_desc32 piod; struct ptrace_lwpinfo32 pl; struct ptrace_vm_entry32 pve; + struct ptrace_coredump32 pc; uint32_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret32 psr; } r32; @@ -1021,6 +1023,16 @@ CP(r32.pve, r.pve, pve_fsid); PTRIN_CP(r32.pve, r.pve, pve_path); break; + case PT_COREDUMP: + if (uap->data != sizeof(r32.pc)) + error = EINVAL; + else + error = copyin(uap->addr, &r32.pc, uap->data); + CP(r32.pc, r.pc, pc_fd); + CP(r32.pc, r.pc, pc_flags); + r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit); + data = sizeof(r.pc); + break; default: addr = uap->addr; break; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1471,12 +1471,13 @@ static void cb_size_segment(vm_map_entry_t, void *); static int core_write(struct coredump_params *, const void *, size_t, off_t, enum uio_seg, size_t *); -static void each_dumpable_segment(struct thread *, segment_callback, void *); +static void each_dumpable_segment(struct thread *, segment_callback, void *, + int); static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t, - struct note_info_list *, size_t); + struct note_info_list *, size_t, int); static void __elfN(prepare_notes)(struct thread *, struct note_info_list *, size_t *); -static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t); +static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t, int); static void __elfN(putnote)(struct note_info *, struct sbuf *); static size_t register_note(struct note_info_list *, int, outfunc_t, void *); static int sbuf_drain_core_output(void *, const char *, int); @@ -1654,7 +1655,7 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) { struct ucred *cred = td->td_ucred; - int error = 0; + int compm, error = 0; struct sseg_closure seginfo; struct note_info_list notelst; struct coredump_params params; @@ -1669,7 +1670,7 @@ /* Size the program segments. */ seginfo.count = 0; seginfo.size = 0; - each_dumpable_segment(td, cb_size_segment, &seginfo); + each_dumpable_segment(td, cb_size_segment, &seginfo, flags); /* * Collect info about the core file header area. @@ -1705,9 +1706,13 @@ } /* Create a compression stream if necessary. */ - if (compress_user_cores != 0) { + compm = compress_user_cores; + if ((flags & (SVC_PT_COREDUMP | SVC_NOCOMPRESS)) == SVC_PT_COREDUMP && + compm == 0) + compm = COMPRESS_GZIP; + if (compm != 0) { params.comp = compressor_init(core_compressed_write, - compress_user_cores, CORE_BUF_SIZE, + compm, CORE_BUF_SIZE, compress_user_cores_level, ¶ms); if (params.comp == NULL) { error = EFAULT; @@ -1722,7 +1727,7 @@ */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); error = __elfN(corehdr)(¶ms, seginfo.count, hdr, hdrsize, ¬elst, - notesz); + notesz, flags); /* Write the contents of all of the writable segments. */ if (error == 0) { @@ -1806,7 +1811,8 @@ * caller-supplied data. */ static void -each_dumpable_segment(struct thread *td, segment_callback func, void *closure) +each_dumpable_segment(struct thread *td, segment_callback func, void *closure, + int flags) { struct proc *p = td->td_proc; vm_map_t map = &p->p_vmspace->vm_map; @@ -1824,12 +1830,15 @@ * are marked MAP_ENTRY_NOCOREDUMP now so we no longer * need to arbitrarily ignore such segments. */ - if (elf_legacy_coredump) { - if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) - continue; - } else { - if ((entry->protection & VM_PROT_ALL) == 0) - continue; + if ((flags & SVC_ALL) == 0) { + if (elf_legacy_coredump) { + if ((entry->protection & VM_PROT_RW) != + VM_PROT_RW) + continue; + } else { + if ((entry->protection & VM_PROT_ALL) == 0) + continue; + } } /* @@ -1838,9 +1847,11 @@ * madvise(2). Do not dump submaps (i.e. parts of the * kernel map). */ - if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) + if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) + continue; + if ((entry->eflags & MAP_ENTRY_NOCOREDUMP) != 0 && + (flags & SVC_ALL) == 0) continue; - if ((object = entry->object.vm_object) == NULL) continue; @@ -1867,7 +1878,8 @@ */ static int __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr, - size_t hdrsize, struct note_info_list *notelst, size_t notesz) + size_t hdrsize, struct note_info_list *notelst, size_t notesz, + int flags) { struct note_info *ninfo; struct sbuf *sb; @@ -1875,7 +1887,7 @@ /* Fill in the header. */ bzero(hdr, hdrsize); - __elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz); + __elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz, flags); sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_core_output, p); @@ -1953,7 +1965,7 @@ static void __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs, - size_t notesz) + size_t notesz, int flags) { Elf_Ehdr *ehdr; Elf_Phdr *phdr; @@ -2032,7 +2044,7 @@ /* All the writable segments from the program. */ phc.phdr = phdr; phc.offset = round_page(hdrsize + notesz); - each_dumpable_segment(td, cb_put_phdr, &phc); + each_dumpable_segment(td, cb_put_phdr, &phc, flags); } static size_t diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -2521,6 +2521,42 @@ thread_unlock(td); } +static void +ptrace_coredump(struct thread *td) +{ + struct proc *p; + struct thr_coredump_req *tcq; + void *rl_cookie; + + MPASS(td == curthread); + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + if ((td->td_dbgflags & TDB_COREDUMPRQ) == 0) + return; + KASSERT((p->p_flag & P_STOPPED_TRACE) != 0, ("not stopped")); + + tcq = td->td_coredump; + KASSERT(tcq != NULL, ("td_coredump is NULL")); + + if (p->p_sysent->sv_coredump == NULL) { + tcq->tc_error = ENOSYS; + goto wake; + } + + PROC_UNLOCK(p); + rl_cookie = vn_rangelock_wlock(tcq->tc_vp, 0, OFF_MAX); + + tcq->tc_error = p->p_sysent->sv_coredump(td, tcq->tc_vp, + tcq->tc_limit, tcq->tc_flags); + + vn_rangelock_unlock(tcq->tc_vp, rl_cookie); + PROC_LOCK(p); +wake: + td->td_dbgflags &= ~TDB_COREDUMPRQ; + td->td_coredump = NULL; + wakeup(p); +} + static int sig_suspend_threads(struct thread *td, struct proc *p, int sending) { @@ -2648,7 +2684,15 @@ td->td_dbgflags &= ~TDB_STOPATFORK; } stopme: + td->td_dbgflags |= TDB_SSWITCH; thread_suspend_switch(td, p); + td->td_dbgflags &= ~TDB_SSWITCH; + if ((td->td_dbgflags & TDB_COREDUMPRQ) != 0) { + PROC_SUNLOCK(p); + ptrace_coredump(td); + PROC_SLOCK(p); + goto stopme; + } if (p->p_xthread == td) p->p_xthread = NULL; if (!(p->p_flag & P_TRACED)) diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -1514,6 +1514,31 @@ return (setrunnable(td, 0)); } +void +thread_run_flash(struct thread *td) +{ + struct proc *p; + + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + + if (TD_ON_SLEEPQ(td)) + sleepq_remove_nested(td); + else + thread_lock(td); + + THREAD_LOCK_ASSERT(td, MA_OWNED); + KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended")); + + TD_CLR_SUSPENDED(td); + PROC_SLOCK(p); + MPASS(p->p_suspcount > 0); + p->p_suspcount--; + PROC_SUNLOCK(p); + if (setrunnable(td, 0)) + kick_proc0(); +} + /* * Allow all threads blocked by single threading to continue running. */ diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -854,6 +854,26 @@ (void *)td, (long)td->td_proc->p_pid, td->td_name); } +void +sleepq_remove_nested(struct thread *td) +{ + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + const void *wchan; + + MPASS(TD_ON_SLEEPQ(td)); + + wchan = td->td_wchan; + sc = SC_LOOKUP(wchan); + mtx_lock_spin(&sc->sc_lock); + sq = sleepq_lookup(wchan); + MPASS(sq != NULL); + thread_lock(td); + sleepq_remove_thread(sq, td); + mtx_unlock_spin(&sc->sc_lock); + /* Returns with the thread lock owned. */ +} + #ifdef INVARIANTS /* * UMA zone item deallocator. diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -51,6 +51,8 @@ #include #include #include +#include +#include #include @@ -469,6 +471,7 @@ struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; + struct ptrace_coredump pc; struct dbreg dbreg; struct fpreg fpreg; struct reg reg; @@ -519,6 +522,12 @@ case PT_VM_ENTRY: error = copyin(uap->addr, &r.pve, sizeof(r.pve)); break; + case PT_COREDUMP: + if (uap->data != sizeof(r.pc)) + error = EINVAL; + else + error = copyin(uap->addr, &r.pc, uap->data); + break; default: addr = uap->addr; break; @@ -601,6 +610,53 @@ p->p_ptevents = PTRACE_DEFAULT; } +static int +proc_can_ptrace(struct thread *td, struct proc *p) +{ + int error; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + if ((p->p_flag & P_WEXIT) != 0) + return (ESRCH); + + if ((error = p_cansee(td, p)) != 0) + return (error); + if ((error = p_candebug(td, p)) != 0) + return (error); + + /* not being traced... */ + if ((p->p_flag & P_TRACED) == 0) + return (EPERM); + + /* not being traced by YOU */ + if (p->p_pptr != td->td_proc) + return (EBUSY); + + /* not currently stopped */ + if ((p->p_flag & P_STOPPED_TRACE) == 0 || + p->p_suspcount != p->p_numthreads || + (p->p_flag & P_WAITED) == 0) + return (EBUSY); + + return (0); +} + +static struct thread * +ptrace_sel_coredump_thread(struct proc *p) +{ + struct thread *td2; + + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS((p->p_flag & P_STOPPED_TRACE) != 0); + + FOREACH_THREAD_IN_PROC(p, td2) { + if ((td2->td_dbgflags & TDB_SSWITCH) != 0) + return (td2); + } + return (NULL); +} + int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) { @@ -611,14 +667,19 @@ struct ptrace_io_desc *piod = NULL; struct ptrace_lwpinfo *pl; struct ptrace_sc_ret *psr; + struct file *fp; + struct ptrace_coredump *pc; + struct thr_coredump_req *tcq; int error, num, tmp; - int proctree_locked = 0; lwpid_t tid = 0, *buf; #ifdef COMPAT_FREEBSD32 int wrap32 = 0, safe = 0; #endif + bool proctree_locked, p2_req_set; curp = td->td_proc; + proctree_locked = false; + p2_req_set = false; /* Lock proctree before locking the process. */ switch (req) { @@ -636,7 +697,7 @@ case PT_DETACH: case PT_GET_SC_ARGS: sx_xlock(&proctree_lock); - proctree_locked = 1; + proctree_locked = true; break; default: break; @@ -757,31 +818,47 @@ /* FALLTHROUGH */ default: - /* not being traced... */ - if ((p->p_flag & P_TRACED) == 0) { - error = EPERM; - goto fail; - } - - /* not being traced by YOU */ - if (p->p_pptr != td->td_proc) { - error = EBUSY; + /* + * Check for ptrace eligibility before waiting for + * holds to drain. + */ + error = proc_can_ptrace(td, p); + if (error != 0) goto fail; - } - /* not currently stopped */ - if ((p->p_flag & P_STOPPED_TRACE) == 0 || - p->p_suspcount != p->p_numthreads || - (p->p_flag & P_WAITED) == 0) { - error = EBUSY; - goto fail; + /* + * Block parallel ptrace requests. Most important, do + * not allow other thread in debugger to continue the + * debuggee until coredump finished. + */ + while ((p->p_flag2 & P2_PTRACEREQ) != 0) { + if (proctree_locked) + sx_xunlock(&proctree_lock); + error = msleep(&p->p_flag2, &p->p_mtx, PPAUSE | PCATCH | + (proctree_locked ? PDROP : 0), "pptrace", 0); + if (proctree_locked) { + sx_xlock(&proctree_lock); + PROC_LOCK(p); + } + if (error == 0 && td2->td_proc != p) + error = ESRCH; + if (error == 0) + error = proc_can_ptrace(td, p); + if (error != 0) + goto fail; } - /* OK */ + /* Ok */ break; } - /* Keep this process around until we finish this request. */ + /* + * Keep this process around and request parallel ptrace() + * request to wait until we finish this request. + */ + MPASS((p->p_flag2 & P2_PTRACEREQ) == 0); + p->p_flag2 |= P2_PTRACEREQ; + p2_req_set = true; _PHOLD(p); /* @@ -816,7 +893,7 @@ p->p_oppid); sx_xunlock(&proctree_lock); - proctree_locked = 0; + proctree_locked = false; MPASS(p->p_xthread == NULL); MPASS((p->p_flag & P_STOPPED_TRACE) == 0); @@ -1053,10 +1130,10 @@ } sx_xunlock(&proctree_lock); - proctree_locked = 0; + proctree_locked = false; sendsig: - MPASS(proctree_locked == 0); + MPASS(!proctree_locked); /* * Clear the pending event for the thread that just @@ -1299,6 +1376,62 @@ PROC_LOCK(p); break; + case PT_COREDUMP: + pc = addr; + CTR2(KTR_PTRACE, "PT_COREDUMP: pid %d, fd %d", + p->p_pid, pc->pc_fd); + + if ((pc->pc_flags & ~(PC_COMPRESS | PC_ALL)) != 0) { + error = EINVAL; + break; + } + PROC_UNLOCK(p); + + tcq = malloc(sizeof(*tcq), M_TEMP, M_WAITOK | M_ZERO); + fp = NULL; + error = fget_write(td, pc->pc_fd, &cap_write_rights, &fp); + if (error != 0) + goto coredump_cleanup_nofp; + if (fp->f_type != DTYPE_VNODE || fp->f_vnode->v_type != VREG) { + error = EPIPE; + goto coredump_cleanup; + } + + PROC_LOCK(p); + error = proc_can_ptrace(td, p); + if (error != 0) + goto coredump_cleanup_locked; + + td2 = ptrace_sel_coredump_thread(p); + if (td2 == NULL) { + error = EBUSY; + goto coredump_cleanup_locked; + } + KASSERT((td2->td_dbgflags & TDB_COREDUMPRQ) == 0, + ("proc %d tid %d req coredump", p->p_pid, td2->td_tid)); + + tcq->tc_vp = fp->f_vnode; + tcq->tc_limit = pc->pc_limit == 0 ? OFF_MAX : pc->pc_limit; + tcq->tc_flags = SVC_PT_COREDUMP; + if ((pc->pc_flags & PC_COMPRESS) == 0) + tcq->tc_flags |= SVC_NOCOMPRESS; + if ((pc->pc_flags & PC_ALL) != 0) + tcq->tc_flags |= SVC_ALL; + td2->td_coredump = tcq; + td2->td_dbgflags |= TDB_COREDUMPRQ; + thread_run_flash(td2); + while ((td2->td_dbgflags & TDB_COREDUMPRQ) != 0) + msleep(p, &p->p_mtx, PPAUSE, "crdmp", 0); + error = tcq->tc_error; +coredump_cleanup_locked: + PROC_UNLOCK(p); +coredump_cleanup: + fdrop(fp, td); +coredump_cleanup_nofp: + free(tcq, M_TEMP); + PROC_LOCK(p); + break; + default: #ifdef __HAVE_PTRACE_MACHDEP if (req >= PT_FIRSTMACH) { @@ -1311,11 +1444,15 @@ error = EINVAL; break; } - out: /* Drop our hold on this process now that the request has completed. */ _PRELE(p); fail: + if (p2_req_set) { + if ((p->p_flag2 & P2_PTRACEREQ) != 0) + wakeup(&p->p_flag2); + p->p_flag2 &= ~P2_PTRACEREQ; + } PROC_UNLOCK(p); if (proctree_locked) sx_xunlock(&proctree_lock); diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -377,6 +377,7 @@ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ int td_pmcpend; + void *td_coredump; /* (c) coredump request. */ #ifdef EPOCH_TRACE SLIST_HEAD(, epoch_tracker) td_epochs; #endif @@ -485,6 +486,8 @@ #define TDB_VFORK 0x00000800 /* vfork indicator for ptrace() */ #define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */ #define TDB_STEP 0x00002000 /* (x86) PSL_T set for PT_STEP */ +#define TDB_SSWITCH 0x00004000 /* Suspended in ptracestop */ +#define TDB_COREDUMPRQ 0x00008000 /* Coredump request */ /* * "Private" flags kept in td_pflags: @@ -829,6 +832,7 @@ #define P2_STKGAP_DISABLE_EXEC 0x00001000 /* Stack gap disabled after exec */ #define P2_ITSTOPPED 0x00002000 +#define P2_PTRACEREQ 0x00004000 /* Active ptrace req */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ @@ -1191,6 +1195,7 @@ void childproc_stopped(struct proc *child, int reason); void childproc_continued(struct proc *child); void childproc_exited(struct proc *child); +void thread_run_flash(struct thread *td); int thread_suspend_check(int how); bool thread_suspend_check_needed(void); void thread_suspend_switch(struct thread *, struct proc *p); diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h --- a/sys/sys/ptrace.h +++ b/sys/sys/ptrace.h @@ -74,6 +74,8 @@ #define PT_GET_SC_ARGS 27 /* fetch syscall args */ #define PT_GET_SC_RET 28 /* fetch syscall results */ +#define PT_COREDUMP 29 /* create a coredump */ + #define PT_GETREGS 33 /* get general-purpose registers */ #define PT_SETREGS 34 /* set general-purpose registers */ #define PT_GETFPREGS 35 /* get floating-point registers */ @@ -176,8 +178,27 @@ char *pve_path; /* Path name of object. */ }; +/* Argument structure for PT_COREDUMP */ +struct ptrace_coredump { + int pc_fd; /* File descriptor to write dump to. */ + uint32_t pc_flags; /* Flags PC_* */ + off_t pc_limit; /* Maximum size of the coredump, + 0 for no limit. */ +}; + +/* Flags for PT_COREDUMP pc_flags */ +#define PC_COMPRESS 0x00000001 /* Allow compression */ +#define PC_ALL 0x00000002 /* Include non-dumpable entries */ + #ifdef _KERNEL +struct thr_coredump_req { + struct vnode *tc_vp; /* vnode to write coredump to. */ + off_t tc_limit; /* max coredump file size. */ + int tc_flags; /* user flags */ + int tc_error; /* request result */ +}; + int ptrace_set_pc(struct thread *_td, unsigned long _addr); int ptrace_single_step(struct thread *_td); int ptrace_clear_single_step(struct thread *_td); diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h --- a/sys/sys/sleepqueue.h +++ b/sys/sys/sleepqueue.h @@ -100,6 +100,7 @@ void sleepq_remove(struct thread *td, const void *wchan); int sleepq_remove_matching(struct sleepqueue *sq, int queue, bool (*matches)(struct thread *), int pri); +void sleepq_remove_nested(struct thread *td); int sleepq_signal(const void *wchan, int flags, int pri, int queue); void sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt, sbintime_t pr, int flags); diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -171,6 +171,11 @@ #define SV_ABI_CLOUDABI 17 #define SV_ABI_UNDEF 255 +/* sv_coredump flags */ +#define SVC_PT_COREDUMP 0x00000001 /* dump requested by ptrace(2) */ +#define SVC_NOCOMPRESS 0x00000002 /* disable compression. */ +#define SVC_ALL 0x00000004 /* dump everything */ + #ifdef _KERNEL extern struct sysentvec aout_sysvec; extern struct sysent sysent[]; diff --git a/usr.bin/gcore/gcore.1 b/usr.bin/gcore/gcore.1 --- a/usr.bin/gcore/gcore.1 +++ b/usr.bin/gcore/gcore.1 @@ -28,7 +28,7 @@ .\" @(#)gcore.1 8.2 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd July 13, 2016 +.Dd April 24, 2021 .Dt GCORE 1 .Os .Sh NAME @@ -37,6 +37,7 @@ .Sh SYNOPSIS .Nm .Op Fl f +.Op Fl k .Op Fl c Ar core .Op Ar executable .Ar pid @@ -58,13 +59,24 @@ Write the core file to the specified file instead of .Dq Pa core. . .It Fl f -Dumps all available segments, excluding only malformed and undumpable segments. +Dumps all available segments, excluding only malformed and undumpable +segments. Unlike the default invocation, this flag dumps mappings of devices which may invalidate the state of device transactions or trigger other unexpected behavior. As a result, this flag should only be used when the behavior of the application and any devices it has mapped is fully understood and any side effects can be controlled or tolerated. +.It Fl k +Use the +.Xr ptrace 2 +.Dv PT_COREDUMP +kernel facility to write the core dump, instead of reading the process' +memory and constructing the dump file in +.Nm +itself. +This is faster, and the dump is written by the +same kernel code that writes core dumps upon fatal signals. .El .Sh FILES .Bl -tag -width /var/log/messages -compact diff --git a/usr.bin/gcore/gcore.c b/usr.bin/gcore/gcore.c --- a/usr.bin/gcore/gcore.c +++ b/usr.bin/gcore/gcore.c @@ -56,13 +56,16 @@ */ #include +#include #include #include #include #include +#include #include #include +#include #include #include #include @@ -75,21 +78,77 @@ static void usage(void) __dead2; static pid_t pid; +static bool kflag = false; SET_DECLARE(dumpset, struct dumpers); +static int +open_corefile(char *corefile) +{ + char fname[MAXPATHLEN]; + int fd; + + if (corefile == NULL) { + (void)snprintf(fname, sizeof(fname), "core.%d", pid); + corefile = fname; + } + fd = open(corefile, O_RDWR | O_CREAT | O_TRUNC, DEFFILEMODE); + if (fd < 0) + err(1, "%s", corefile); + return (fd); +} + +static void +kcoredump(int fd, pid_t pid) +{ + struct ptrace_coredump pc; + int error, res, ret, waited; + + error = ptrace(PT_ATTACH, pid, NULL, 0); + if (error != 0) + err(1, "attach"); + + waited = waitpid(pid, &res, 0); + if (waited == -1) + err(1, "wait for STOP"); + + ret = 0; + memset(&pc, 0, sizeof(pc)); + pc.pc_fd = fd; + pc.pc_flags = (pflags & PFLAGS_FULL) != 0 ? PC_ALL : 0; + error = ptrace(PT_COREDUMP, pid, (void *)&pc, sizeof(pc)); + if (error == -1) { + warn("coredump"); + ret = 1; + } + + waited = waitpid(pid, &res, WNOHANG); + if (waited == -1) { + warn("wait after coredump"); + ret = 1; + } + + error = ptrace(PT_DETACH, pid, NULL, 0); + if (error == -1) { + warn("detach failed, check process status"); + ret = 1; + } + + exit(ret); +} + int main(int argc, char *argv[]) { int ch, efd, fd, name[4]; char *binfile, *corefile; - char passpath[MAXPATHLEN], fname[MAXPATHLEN]; + char passpath[MAXPATHLEN]; struct dumpers **d, *dumper; size_t len; pflags = 0; corefile = NULL; - while ((ch = getopt(argc, argv, "c:f")) != -1) { + while ((ch = getopt(argc, argv, "c:fk")) != -1) { switch (ch) { case 'c': corefile = optarg; @@ -97,6 +156,9 @@ case 'f': pflags |= PFLAGS_FULL; break; + case 'k': + kflag = true; + break; default: usage(); break; @@ -104,10 +166,26 @@ } argv += optind; argc -= optind; + /* XXX we should check that the pid argument is really a number */ switch (argc) { case 1: pid = atoi(argv[0]); + break; + case 2: + binfile = argv[0]; + pid = atoi(argv[1]); + break; + default: + usage(); + } + + if (kflag) { + fd = open_corefile(corefile); + kcoredump(fd, pid); + } + + if (argc == 1) { name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_PATHNAME; @@ -116,13 +194,6 @@ if (sysctl(name, 4, passpath, &len, NULL, 0) == -1) errx(1, "kern.proc.pathname failure"); binfile = passpath; - break; - case 2: - pid = atoi(argv[1]); - binfile = argv[0]; - break; - default: - usage(); } efd = open(binfile, O_RDONLY, 0); if (efd < 0) @@ -138,13 +209,7 @@ } if (dumper == NULL) errx(1, "Invalid executable file"); - if (corefile == NULL) { - (void)snprintf(fname, sizeof(fname), "core.%d", pid); - corefile = fname; - } - fd = open(corefile, O_RDWR|O_CREAT|O_TRUNC, DEFFILEMODE); - if (fd < 0) - err(1, "%s", corefile); + fd = open_corefile(corefile); dumper->dump(efd, fd, pid); (void)close(fd); @@ -156,6 +221,7 @@ usage(void) { - (void)fprintf(stderr, "usage: gcore [-c core] [executable] pid\n"); + (void)fprintf(stderr, + "usage: gcore [-kf] [-c core] [executable] pid\n"); exit(1); }