diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2 --- a/lib/libc/sys/ptrace.2 +++ b/lib/libc/sys/ptrace.2 @@ -2,7 +2,7 @@ .\" $NetBSD: ptrace.2,v 1.2 1995/02/27 12:35:37 cgd Exp $ .\" .\" This file is in the public domain. -.Dd January 27, 2022 +.Dd December 2, 2022 .Dt PTRACE 2 .Os .Sh NAME @@ -957,10 +957,44 @@ .Vt "struct ptrace_coredump" must be passed in .Fa data . -.Pp -The process must be stopped before dumping core. +.It Dv PT_SC_REMOTE +Request to execute a syscall in the context of the traced process, +in the specified thread. +The +.Fa addr +argument must point to the +.Vt "struct ptrace_sc_remote" , +which describes the requested syscall and its arguments, and receives +the result. +The size of +.Vt "struct ptrace_sc_remote" +must be passed in +.Fa data. +.Bd -literal +struct ptrace_sc_remote { + struct ptrace_sc_ret pscr_ret; + u_int pscr_syscall; + u_int pscr_nargs; + u_long *pscr_args; +}; +.Ed +The +.Dv pscr_syscall +contains the syscall number to execute, the +.Dv pscr_nargs +is the number of supplied arguments, which are filled in the +.Dv pscr_args +array. +Result of the execution is returned in the +.Dv pscr_ret +member. +Note that the request and its result do not affect returned value from +the currently executed syscall, if any. +.El +.Sh PT_COREDUMP and PT_SC_REMOTE usage +The process must be stopped before dumping or initiating remote system call. A single thread in the target process is temporarily unsuspended -in kernel to write the dump. +in kernel to perform the action. If the .Nm call fails before a thread is unsuspended, there is no event to @@ -977,9 +1011,23 @@ with .Dv WNOHANG flag after -.Dv PT_COREDUMP , +.Dv PT_COREDUMP +and +.Dv PT_SC_REMOTE , and silently accept zero result from it. -.El +.Pp +For +.Dv PT_SC_REMOTE , +the selected thread must be stopped in the safe place, which is +currently defined as a syscall exit, or a return from kernel to +user mode (basically, a signal handler call place). +Kernel returns +.Er EBUSY +status if attempt is made to execute remote syscall at unsafe stop. +.Pp +Note that due to the mode of execution for the remote syscall, in +particular, the setting where only one thread is allowed to run, +the syscall might block on resources owned by suspended threads. .Sh ARM MACHINE-SPECIFIC REQUESTS .Bl -tag -width "Dv PT_SETVFPREGS" .It Dv PT_GETVFPREGS diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -492,10 +492,22 @@ int32_t stbcnt; }; +struct ptrace_sc_ret32 { + uint32_t sr_retval[2]; + int sr_error; +}; + struct ptrace_coredump32 { int pc_fd; uint32_t pc_flags; uint32_t pc_limit1, pc_limit2; }; +struct ptrace_sc_remote32 { + struct ptrace_sc_ret32 pscr_ret; + u_int pscr_syscall; + u_int pscr_nargs; + uint32_t pscr_args; +}; + #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -133,11 +133,6 @@ uint32_t piod_len; }; -struct ptrace_sc_ret32 { - uint32_t sr_retval[2]; - int sr_error; -}; - struct ptrace_vm_entry32 { int pve_entry; int pve_timestamp; @@ -971,6 +966,7 @@ struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct ptrace_coredump pc; + struct ptrace_sc_remote sr; struct dbreg32 dbreg; struct fpreg32 fpreg; struct reg32 reg; @@ -984,10 +980,13 @@ struct ptrace_lwpinfo32 pl; struct ptrace_vm_entry32 pve; struct ptrace_coredump32 pc; + struct ptrace_sc_remote32 sr; uint32_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret32 psr; struct iovec32 vec; } r32; + u_long pscr_args[nitems(td->td_sa.args)]; + u_int pscr_args32[nitems(td->td_sa.args)]; void *addr; int data, error, i; @@ -1086,6 +1085,28 @@ r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit); data = sizeof(r.pc); break; + case PT_SC_REMOTE: + if (uap->data != sizeof(r32.sr)) { + error = EINVAL; + break; + } + error = copyin(uap->addr, &r32.sr, uap->data); + if (error != 0) + break; + CP(r32.sr, r.sr, pscr_syscall); + CP(r32.sr, r.sr, pscr_nargs); + if (r.sr.pscr_nargs > nitems(td->td_sa.args)) { + error = EINVAL; + break; + } + error = copyin(PTRIN(r32.sr.pscr_args), pscr_args32, + sizeof(u_int) * r32.sr.pscr_nargs); + if (error != 0) + break; + for (i = 0; i < r32.sr.pscr_nargs; i++) + pscr_args[i] = pscr_args32[i]; + r.sr.pscr_args = pscr_args; + break; default: addr = uap->addr; break; @@ -1146,6 +1167,12 @@ error = copyout(&r32.psr, uap->addr, MIN(uap->data, sizeof(r32.psr))); break; + case PT_SC_REMOTE: + ptrace_sc_ret_to32(&r.sr.pscr_ret, &r32.sr.pscr_ret); + error = copyout(&r32.sr.pscr_ret, uap->addr + + offsetof(struct ptrace_sc_remote32, pscr_ret), + sizeof(r32.psr)); + break; } return (error); diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -39,9 +39,11 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_capsicum.h" #include "opt_ktrace.h" #include +#include #include #include #include @@ -75,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -279,7 +282,7 @@ ast_sig(struct thread *td, int tda) { struct proc *p; - int sig; + int old_boundary, sig; bool resched_sigs; p = td->td_proc; @@ -321,12 +324,15 @@ !SIGISEMPTY(p->p_siglist)) { sigfastblock_fetch(td); PROC_LOCK(p); + old_boundary = ~TDB_BOUNDARY | (td->td_dbgflags & TDB_BOUNDARY); + td->td_dbgflags |= TDB_BOUNDARY; mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) { KASSERT(sig >= 0, ("sig %d", sig)); postsig(sig); } mtx_unlock(&p->p_sigacts->ps_mtx); + td->td_dbgflags &= old_boundary; PROC_UNLOCK(p); resched_sigs = true; } else { @@ -2630,41 +2636,124 @@ } static void -ptrace_coredump(struct thread *td) +ptrace_coredump(struct thread *td, struct proc *p, struct thr_coredump_req *tcq) { - struct proc *p; - struct thr_coredump_req *tcq; void *rl_cookie; - MPASS(td == curthread); - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - if ((td->td_dbgflags & TDB_COREDUMPRQ) == 0) - return; - KASSERT((p->p_flag & P_STOPPED_TRACE) != 0, ("not stopped")); - - tcq = td->td_coredump; - KASSERT(tcq != NULL, ("td_coredump is NULL")); - if (p->p_sysent->sv_coredump == NULL) { tcq->tc_error = ENOSYS; - goto wake; + return; } - PROC_UNLOCK(p); rl_cookie = vn_rangelock_wlock(tcq->tc_vp, 0, OFF_MAX); - tcq->tc_error = p->p_sysent->sv_coredump(td, tcq->tc_vp, tcq->tc_limit, tcq->tc_flags); - vn_rangelock_unlock(tcq->tc_vp, rl_cookie); +} + +static void +ptrace_syscallrq(struct thread *td, struct proc *p, struct thr_syscall_req *tsr) +{ + struct sysentvec *sv; + struct sysent *se; + register_t rv_saved[2]; + int error, nerror; + int sc; + bool audited, sy_thr_static; + + sv = p->p_sysent; + if (sv->sv_table == NULL || sv->sv_size < tsr->ts_sa.code) { + tsr->ts_ret.sr_error = ENOSYS; + return; + } + + sc = tsr->ts_sa.code; + if (sc == SYS_syscall || sc == SYS___syscall) { + memcpy(&tsr->ts_sa.args[0], &tsr->ts_sa.args[1], + sizeof(register_t) * (tsr->ts_nargs - 1)); + } + + tsr->ts_sa.callp = se = &sv->sv_table[sc]; + +#ifdef CAPABILITY_MODE + if (IN_CAPABILITY_MODE(td) && (se->sy_flags & SYF_CAPENABLED) == 0) { + tsr->ts_ret.sr_error = ECAPMODE; + return; + } +#endif + + sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0; + audited = AUDIT_SYSCALL_ENTER(tsr->ts_syscall, td) != 0; + + if (!sy_thr_static) { + error = syscall_thread_enter(td, se); + if (error != 0) { + tsr->ts_ret.sr_error = error; + return; + } + } + + rv_saved[0] = td->td_retval[0]; + rv_saved[1] = td->td_retval[1]; + nerror = td->td_errno; + td->td_retval[0] = 0; + td->td_retval[1] = 0; + +#ifdef KDTRACE_HOOKS + if (se->sy_entry != 0) + (*systrace_probe_func)(&tsr->ts_sa, SYSTRACE_ENTRY, 0); +#endif + tsr->ts_ret.sr_error = se->sy_call(td, tsr->ts_sa.args); +#ifdef KDTRACE_HOOKS + if (se->sy_return != 0) + (*systrace_probe_func)(&tsr->ts_sa, SYSTRACE_RETURN, + tsr->ts_ret->sr_error != 0 ? -1 : td->td_retval[0]); +#endif + + tsr->ts_ret.sr_retval[0] = td->td_retval[0]; + tsr->ts_ret.sr_retval[1] = td->td_retval[1]; + td->td_retval[0] = rv_saved[0]; + td->td_retval[1] = rv_saved[1]; + td->td_errno = nerror; + + if (audited) + AUDIT_SYSCALL_EXIT(error, td); + if (!sy_thr_static) + syscall_thread_exit(td, se); +} + +static void +ptrace_remotereq(struct thread *td, int flag) +{ + struct proc *p; + + MPASS(td == curthread); + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + if ((td->td_dbgflags & flag) == 0) + return; + KASSERT((p->p_flag & P_STOPPED_TRACE) != 0, ("not stopped")); + KASSERT(td->td_remotereq != NULL, ("td_remotereq is NULL")); + + PROC_UNLOCK(p); + switch (flag) { + case TDB_COREDUMPRQ: + ptrace_coredump(td, p, td->td_remotereq); + break; + case TDB_SCREMOTERQ: + ptrace_syscallrq(td, p, td->td_remotereq); + break; + default: + __unreachable(); + } PROC_LOCK(p); -wake: - td->td_dbgflags &= ~TDB_COREDUMPRQ; - td->td_coredump = NULL; + + td->td_dbgflags &= ~flag; + td->td_remotereq = NULL; wakeup(p); } + static int sig_suspend_threads(struct thread *td, struct proc *p) { @@ -2792,9 +2881,14 @@ td->td_dbgflags |= TDB_SSWITCH; thread_suspend_switch(td, p); td->td_dbgflags &= ~TDB_SSWITCH; - if ((td->td_dbgflags & TDB_COREDUMPRQ) != 0) { + if ((td->td_dbgflags & (TDB_COREDUMPRQ | + TDB_SCREMOTERQ)) != 0) { + MPASS((td->td_dbgflags & (TDB_COREDUMPRQ | + TDB_SCREMOTERQ)) != + (TDB_COREDUMPRQ | TDB_SCREMOTERQ)); PROC_SUNLOCK(p); - ptrace_coredump(td); + ptrace_remotereq(td, td->td_dbgflags & + (TDB_COREDUMPRQ | TDB_SCREMOTERQ)); PROC_SLOCK(p); goto stopme; } diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c --- a/sys/kern/subr_syscall.c +++ b/sys/kern/subr_syscall.c @@ -73,6 +73,7 @@ traced = (p->p_flag & P_TRACED) != 0; if (__predict_false(traced || td->td_dbgflags & TDB_USERWR)) { PROC_LOCK(p); + MPASS((td->td_dbgflags & TDB_BOUNDARY) == 0); td->td_dbgflags &= ~TDB_USERWR; if (traced) td->td_dbgflags |= TDB_SCE; @@ -201,7 +202,7 @@ td->td_retval[1]); if (__predict_false(traced)) { PROC_LOCK(p); - td->td_dbgflags &= ~TDB_SCE; + td->td_dbgflags &= ~(TDB_SCE | TDB_BOUNDARY); PROC_UNLOCK(p); } (p->p_sysent->sv_set_syscall_retval)(td, error); @@ -280,9 +281,13 @@ */ if (traced && ((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 || - (p->p_ptevents & PTRACE_SCX) != 0)) + (p->p_ptevents & PTRACE_SCX) != 0)) { + MPASS((td->td_dbgflags & TDB_BOUNDARY) == 0); + td->td_dbgflags |= TDB_BOUNDARY; ptracestop(td, SIGTRAP, NULL); - td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK); + } + td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK | + TDB_BOUNDARY); PROC_UNLOCK(p); } } diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -592,6 +592,7 @@ struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct ptrace_coredump pc; + struct ptrace_sc_remote sr; struct dbreg dbreg; struct fpreg fpreg; struct reg reg; @@ -600,6 +601,7 @@ struct ptrace_sc_ret psr; int ptevents; } r; + u_long pscr_args[nitems(td->td_sa.args)]; void *addr; int error; @@ -657,6 +659,24 @@ else error = copyin(uap->addr, &r.pc, uap->data); break; + case PT_SC_REMOTE: + if (uap->data != sizeof(r.sr)) { + error = EINVAL; + break; + } + error = copyin(uap->addr, &r.sr, uap->data); + if (error != 0) + break; + if (r.sr.pscr_nargs > nitems(td->td_sa.args)) { + error = EINVAL; + break; + } + error = copyin(r.sr.pscr_args, pscr_args, + sizeof(u_long) * r.sr.pscr_nargs); + if (error != 0) + break; + r.sr.pscr_args = pscr_args; + break; default: addr = uap->addr; break; @@ -703,6 +723,11 @@ error = copyout(&r.psr, uap->addr, MIN(uap->data, sizeof(r.psr))); break; + case PT_SC_REMOTE: + error = copyout(&r.sr.pscr_ret, uap->addr + + offsetof(struct ptrace_sc_remote, pscr_ret), + sizeof(r.sr.pscr_ret)); + break; } return (error); @@ -812,9 +837,11 @@ struct ptrace_io_desc *piod = NULL; struct ptrace_lwpinfo *pl; struct ptrace_sc_ret *psr; + struct ptrace_sc_remote *pscr; struct file *fp; struct ptrace_coredump *pc; struct thr_coredump_req *tcq; + struct thr_syscall_req *tsr; int error, num, tmp; lwpid_t tid = 0, *buf; #ifdef COMPAT_FREEBSD32 @@ -1559,7 +1586,8 @@ error = EBUSY; goto coredump_cleanup_locked; } - KASSERT((td2->td_dbgflags & TDB_COREDUMPRQ) == 0, + KASSERT((td2->td_dbgflags & (TDB_COREDUMPRQ | + TDB_SCREMOTERQ)) == 0, ("proc %d tid %d req coredump", p->p_pid, td2->td_tid)); tcq->tc_vp = fp->f_vnode; @@ -1569,7 +1597,7 @@ tcq->tc_flags |= SVC_NOCOMPRESS; if ((pc->pc_flags & PC_ALL) != 0) tcq->tc_flags |= SVC_ALL; - td2->td_coredump = tcq; + td2->td_remotereq = tcq; td2->td_dbgflags |= TDB_COREDUMPRQ; thread_run_flash(td2); while ((td2->td_dbgflags & TDB_COREDUMPRQ) != 0) @@ -1584,6 +1612,50 @@ PROC_LOCK(p); break; + case PT_SC_REMOTE: + pscr = addr; + CTR2(KTR_PTRACE, "PT_SC_REMOTE: pid %d, syscall %d", + p->p_pid, pscr->pscr_syscall); + if ((td2->td_dbgflags & TDB_BOUNDARY) == 0) { + error = EBUSY; + break; + } + PROC_UNLOCK(p); + MPASS(pscr->pscr_nargs <= nitems(td->td_sa.args)); + + tsr = malloc(sizeof(struct thr_syscall_req), M_TEMP, + M_WAITOK | M_ZERO); + + tsr->ts_sa.code = pscr->pscr_syscall; + tsr->ts_nargs = pscr->pscr_nargs; + memcpy(&tsr->ts_sa.args, pscr->pscr_args, + sizeof(register_t) * tsr->ts_nargs); + + PROC_LOCK(p); + error = proc_can_ptrace(td, p); + if (error != 0) { + free(tsr, M_TEMP); + break; + } + if (td2->td_proc != p) { + free(tsr, M_TEMP); + error = ESRCH; + break; + } + KASSERT((td2->td_dbgflags & (TDB_COREDUMPRQ | + TDB_SCREMOTERQ)) == 0, + ("proc %d tid %d req coredump", p->p_pid, td2->td_tid)); + + td2->td_remotereq = tsr; + td2->td_dbgflags |= TDB_SCREMOTERQ; + thread_run_flash(td2); + while ((td2->td_dbgflags & TDB_SCREMOTERQ) != 0) + msleep(p, &p->p_mtx, PPAUSE, "pscrx", 0); + error = 0; + memcpy(&pscr->pscr_ret, &tsr->ts_ret, sizeof(tsr->ts_ret)); + free(tsr, M_TEMP); + break; + default: #ifdef __HAVE_PTRACE_MACHDEP if (req >= PT_FIRSTMACH) { diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -384,7 +384,7 @@ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ int td_pmcpend; - void *td_coredump; /* (c) coredump request. */ + void *td_remotereq; /* (c) dbg remote request. */ off_t td_ktr_io_lim; /* (k) limit for ktrace file size */ #ifdef EPOCH_TRACE SLIST_HEAD(, epoch_tracker) td_epochs; @@ -522,7 +522,9 @@ #define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */ #define TDB_STEP 0x00002000 /* (x86) PSL_T set for PT_STEP */ #define TDB_SSWITCH 0x00004000 /* Suspended in ptracestop */ -#define TDB_COREDUMPRQ 0x00008000 /* Coredump request */ +#define TDB_BOUNDARY 0x00008000 /* ptracestop() at boundary */ +#define TDB_COREDUMPRQ 0x00010000 /* Coredump request */ +#define TDB_SCREMOTERQ 0x00020000 /* Remote syscall request */ /* * "Private" flags kept in td_pflags: diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h --- a/sys/sys/ptrace.h +++ b/sys/sys/ptrace.h @@ -87,6 +87,7 @@ #define PT_VM_ENTRY 41 /* Get VM map (entry) */ #define PT_GETREGSET 42 /* Get a target register set */ #define PT_SETREGSET 43 /* Set a target register set */ +#define PT_SC_REMOTE 44 /* Execute a syscall */ #define PT_FIRSTMACH 64 /* for machine-specific requests */ #include /* machine-specific requests, if any */ @@ -192,8 +193,17 @@ #define PC_COMPRESS 0x00000001 /* Allow compression */ #define PC_ALL 0x00000002 /* Include non-dumpable entries */ +struct ptrace_sc_remote { + struct ptrace_sc_ret pscr_ret; + u_int pscr_syscall; + u_int pscr_nargs; + u_long *pscr_args; +}; + #ifdef _KERNEL +#include + struct thr_coredump_req { struct vnode *tc_vp; /* vnode to write coredump to. */ off_t tc_limit; /* max coredump file size. */ @@ -201,6 +211,12 @@ int tc_error; /* request result */ }; +struct thr_syscall_req { + struct ptrace_sc_ret ts_ret; + u_int ts_nargs; + struct syscall_args ts_sa; +}; + int ptrace_set_pc(struct thread *_td, unsigned long _addr); int ptrace_single_step(struct thread *_td); int ptrace_clear_single_step(struct thread *_td); diff --git a/tools/test/ptrace/scescx.c b/tools/test/ptrace/scescx.c --- a/tools/test/ptrace/scescx.c +++ b/tools/test/ptrace/scescx.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,16 @@ #include #include +#ifndef PT_SC_REMOTE +#define PT_SC_REMOTE 44 /* Execute a syscall */ +struct ptrace_sc_remote { + struct ptrace_sc_ret pscr_ret; + u_int pscr_syscall; + u_int pscr_nargs; + u_long *pscr_args; +}; +#endif + #define TRACE ">>>> " static const char * @@ -213,9 +224,13 @@ printf("\n"); } +static int trace_syscalls = 1; +static int remote_getpid = 0; + static int trace_sc(int pid) { + struct ptrace_sc_remote pscr; struct ptrace_lwpinfo lwpinfo; int status; @@ -269,6 +284,24 @@ wait_info(pid, status, &lwpinfo); assert(lwpinfo.pl_flags & PL_FLAG_SCX); + if (remote_getpid) { + memset(&pscr, 0, sizeof(pscr)); + pscr.pscr_syscall = SYS_getpid; + pscr.pscr_nargs = 0; + if (ptrace(PT_SC_REMOTE, pid, (caddr_t)&pscr, + sizeof(pscr)) < 0) { + perror("PT_SC_REMOTE"); + ptrace(PT_KILL, pid, NULL, 0); + return (-1); + } else { + printf(TRACE "remote getpid %ld errno %d\n", + pscr.pscr_ret.sr_retval[0], pscr.pscr_ret.sr_error); + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid"); + return (-1); + } + } + } if (lwpinfo.pl_flags & PL_FLAG_EXEC) get_pathname(pid); @@ -322,8 +355,6 @@ return (0); } -static int trace_syscalls = 1; - static int trace(pid_t pid) { @@ -340,12 +371,16 @@ pid_t pid, pid1; trace_syscalls = 1; + remote_getpid = 0; use_vfork = 0; - while ((c = getopt(argc, argv, "csv")) != -1) { + while ((c = getopt(argc, argv, "crsv")) != -1) { switch (c) { case 'c': trace_syscalls = 0; break; + case 'r': + remote_getpid = 1; + break; case 's': trace_syscalls = 1; break; @@ -354,7 +389,8 @@ break; default: case '?': - fprintf(stderr, "Usage: %s [-c] [-s] [-v]\n", argv[0]); + fprintf(stderr, "Usage: %s [-c] [-r] [-s] [-v]\n", + argv[0]); return (2); } }