diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index 4c1936edc301..555762185411 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,1399 +1,1420 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 */ #include #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The ktrace facility allows the tracing of certain key events in user space * processes, such as system calls, signal delivery, context switches, and * user generated events using utrace(2). It works by streaming event * records and data to a vnode associated with the process using the * ktrace(2) system call. In general, records can be written directly from * the context that generates the event. One important exception to this is * during a context switch, where sleeping is not permitted. To handle this * case, trace events are generated using in-kernel ktr_request records, and * then delivered to disk at a convenient moment -- either immediately, the * next traceable event, at system call return, or at process exit. * * When dealing with multiple threads or processes writing to the same event * log, ordering guarantees are weak: specifically, if an event has multiple * records (i.e., system call enter and return), they may be interlaced with * records from another event. Process and thread ID information is provided * in the record, and user applications can de-interlace events if required. */ static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); #ifdef KTRACE FEATURE(ktrace, "Kernel support for system-call tracing"); #ifndef KTRACE_REQUEST_POOL #define KTRACE_REQUEST_POOL 100 #endif struct ktr_request { struct ktr_header ktr_header; void *ktr_buffer; union { struct ktr_proc_ctor ktr_proc_ctor; struct ktr_cap_fail ktr_cap_fail; struct ktr_syscall ktr_syscall; struct ktr_sysret ktr_sysret; struct ktr_genio ktr_genio; struct ktr_psig ktr_psig; struct ktr_csw ktr_csw; struct ktr_fault ktr_fault; struct ktr_faultend ktr_faultend; struct ktr_struct_array ktr_struct_array; } ktr_data; STAILQ_ENTRY(ktr_request) ktr_list; }; static const int data_lengths[] = { [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), [KTR_SYSRET] = sizeof(struct ktr_sysret), [KTR_NAMEI] = 0, [KTR_GENIO] = sizeof(struct ktr_genio), [KTR_PSIG] = sizeof(struct ktr_psig), [KTR_CSW] = sizeof(struct ktr_csw), [KTR_USER] = 0, [KTR_STRUCT] = 0, [KTR_SYSCTL] = 0, [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), [KTR_PROCDTOR] = 0, [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), [KTR_FAULT] = sizeof(struct ktr_fault), [KTR_FAULTEND] = sizeof(struct ktr_faultend), [KTR_STRUCT_ARRAY] = sizeof(struct ktr_struct_array), }; static STAILQ_HEAD(, ktr_request) ktr_free; static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "KTRACE options"); static u_int ktr_requestpool = KTRACE_REQUEST_POOL; TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); u_int ktr_geniosize = PAGE_SIZE; SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 0, "Maximum size of genio event payload"); /* * Allow to not to send signal to traced process, in which context the * ktr record is written. The limit is applied from the process that * set up ktrace, so killing the traced process is not completely fair. */ int ktr_filesize_limit_signal = 0; SYSCTL_INT(_kern_ktrace, OID_AUTO, filesize_limit_signal, CTLFLAG_RWTUN, &ktr_filesize_limit_signal, 0, "Send SIGXFSZ to the traced process when the log size limit is exceeded"); static int print_message = 1; static struct mtx ktrace_mtx; static struct sx ktrace_sx; struct ktr_io_params { struct vnode *vp; struct ucred *cr; off_t lim; u_int refs; }; static void ktrace_init(void *dummy); static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); static struct ktr_request *ktr_getrequest(int type); static void ktr_submitrequest(struct thread *td, struct ktr_request *req); static struct ktr_io_params *ktr_freeproc(struct proc *p); static void ktr_freerequest(struct ktr_request *req); static void ktr_freerequest_locked(struct ktr_request *req); static void ktr_writerequest(struct thread *td, struct ktr_request *req); static int ktrcanset(struct thread *,struct proc *); static int ktrsetchildren(struct thread *, struct proc *, int, int, struct ktr_io_params *); static int ktrops(struct thread *, struct proc *, int, int, struct ktr_io_params *); static void ktrprocctor_entered(struct thread *, struct proc *); /* * ktrace itself generates events, such as context switches, which we do not * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine * whether or not it is in a region where tracing of events should be * suppressed. */ static void ktrace_enter(struct thread *td) { KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); td->td_pflags |= TDP_INKTRACE; } static void ktrace_exit(struct thread *td) { KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); td->td_pflags &= ~TDP_INKTRACE; } static void ktrace_assert(struct thread *td) { KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); } static void ast_ktrace(struct thread *td, int tda __unused) { KTRUSERRET(td); } static void ktrace_init(void *dummy) { struct ktr_request *req; int i; mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); sx_init(&ktrace_sx, "ktrace_sx"); STAILQ_INIT(&ktr_free); for (i = 0; i < ktr_requestpool; i++) { req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK | M_ZERO); STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); } ast_register(TDA_KTRACE, ASTR_ASTF_REQUIRED, 0, ast_ktrace); } SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) { struct thread *td; u_int newsize, oldsize, wantsize; int error; /* Handle easy read-only case first to avoid warnings from GCC. */ if (!req->newptr) { oldsize = ktr_requestpool; return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); } error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); if (error) return (error); td = curthread; ktrace_enter(td); oldsize = ktr_requestpool; newsize = ktrace_resize_pool(oldsize, wantsize); ktrace_exit(td); error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); if (error) return (error); if (wantsize > oldsize && newsize < wantsize) return (ENOSPC); return (0); } SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "Pool buffer size for ktrace(1)"); static u_int ktrace_resize_pool(u_int oldsize, u_int newsize) { STAILQ_HEAD(, ktr_request) ktr_new; struct ktr_request *req; int bound; print_message = 1; bound = newsize - oldsize; if (bound == 0) return (ktr_requestpool); if (bound < 0) { mtx_lock(&ktrace_mtx); /* Shrink pool down to newsize if possible. */ while (bound++ < 0) { req = STAILQ_FIRST(&ktr_free); if (req == NULL) break; STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); ktr_requestpool--; free(req, M_KTRACE); } } else { /* Grow pool up to newsize. */ STAILQ_INIT(&ktr_new); while (bound-- > 0) { req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK | M_ZERO); STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); } mtx_lock(&ktrace_mtx); STAILQ_CONCAT(&ktr_free, &ktr_new); ktr_requestpool += (newsize - oldsize); } mtx_unlock(&ktrace_mtx); return (ktr_requestpool); } /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == (sizeof((struct thread *)NULL)->td_name)); static struct ktr_request * ktr_getrequest_entered(struct thread *td, int type) { struct ktr_request *req; struct proc *p = td->td_proc; int pm; mtx_lock(&ktrace_mtx); if (!KTRCHECK(td, type)) { mtx_unlock(&ktrace_mtx); return (NULL); } req = STAILQ_FIRST(&ktr_free); if (req != NULL) { STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); req->ktr_header.ktr_type = type; if (p->p_traceflag & KTRFAC_DROP) { req->ktr_header.ktr_type |= KTR_DROP; p->p_traceflag &= ~KTRFAC_DROP; } mtx_unlock(&ktrace_mtx); nanotime(&req->ktr_header.ktr_time); req->ktr_header.ktr_type |= KTR_VERSIONED; req->ktr_header.ktr_pid = p->p_pid; req->ktr_header.ktr_tid = td->td_tid; req->ktr_header.ktr_cpu = PCPU_GET(cpuid); req->ktr_header.ktr_version = KTR_VERSION1; bcopy(td->td_name, req->ktr_header.ktr_comm, sizeof(req->ktr_header.ktr_comm)); req->ktr_buffer = NULL; req->ktr_header.ktr_len = 0; } else { p->p_traceflag |= KTRFAC_DROP; pm = print_message; print_message = 0; mtx_unlock(&ktrace_mtx); if (pm) printf("Out of ktrace request objects.\n"); } return (req); } static struct ktr_request * ktr_getrequest(int type) { struct thread *td = curthread; struct ktr_request *req; ktrace_enter(td); req = ktr_getrequest_entered(td, type); if (req == NULL) ktrace_exit(td); return (req); } /* * Some trace generation environments don't permit direct access to VFS, * such as during a context switch where sleeping is not allowed. Under these * circumstances, queue a request to the thread to be written asynchronously * later. */ static void ktr_enqueuerequest(struct thread *td, struct ktr_request *req) { mtx_lock(&ktrace_mtx); STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); mtx_unlock(&ktrace_mtx); ast_sched(td, TDA_KTRACE); } /* * Drain any pending ktrace records from the per-thread queue to disk. This * is used both internally before committing other records, and also on * system call return. We drain all the ones we can find at the time when * drain is requested, but don't keep draining after that as those events * may be approximately "after" the current event. */ static void ktr_drain(struct thread *td) { struct ktr_request *queued_req; STAILQ_HEAD(, ktr_request) local_queue; ktrace_assert(td); sx_assert(&ktrace_sx, SX_XLOCKED); STAILQ_INIT(&local_queue); if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { mtx_lock(&ktrace_mtx); STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); mtx_unlock(&ktrace_mtx); while ((queued_req = STAILQ_FIRST(&local_queue))) { STAILQ_REMOVE_HEAD(&local_queue, ktr_list); ktr_writerequest(td, queued_req); ktr_freerequest(queued_req); } } } /* * Submit a trace record for immediate commit to disk -- to be used only * where entering VFS is OK. First drain any pending records that may have * been cached in the thread. */ static void ktr_submitrequest(struct thread *td, struct ktr_request *req) { ktrace_assert(td); sx_xlock(&ktrace_sx); ktr_drain(td); ktr_writerequest(td, req); ktr_freerequest(req); sx_xunlock(&ktrace_sx); ktrace_exit(td); } static void ktr_freerequest(struct ktr_request *req) { mtx_lock(&ktrace_mtx); ktr_freerequest_locked(req); mtx_unlock(&ktrace_mtx); } static void ktr_freerequest_locked(struct ktr_request *req) { mtx_assert(&ktrace_mtx, MA_OWNED); if (req->ktr_buffer != NULL) free(req->ktr_buffer, M_KTRACE); STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); } static void ktr_io_params_ref(struct ktr_io_params *kiop) { mtx_assert(&ktrace_mtx, MA_OWNED); kiop->refs++; } static struct ktr_io_params * ktr_io_params_rele(struct ktr_io_params *kiop) { mtx_assert(&ktrace_mtx, MA_OWNED); if (kiop == NULL) return (NULL); KASSERT(kiop->refs > 0, ("kiop ref == 0 %p", kiop)); return (--(kiop->refs) == 0 ? kiop : NULL); } void ktr_io_params_free(struct ktr_io_params *kiop) { if (kiop == NULL) return; MPASS(kiop->refs == 0); vn_close(kiop->vp, FWRITE, kiop->cr, curthread); crfree(kiop->cr); free(kiop, M_KTRACE); } static struct ktr_io_params * ktr_io_params_alloc(struct thread *td, struct vnode *vp) { struct ktr_io_params *res; res = malloc(sizeof(struct ktr_io_params), M_KTRACE, M_WAITOK); res->vp = vp; res->cr = crhold(td->td_ucred); res->lim = lim_cur(td, RLIMIT_FSIZE); res->refs = 1; return (res); } /* * Disable tracing for a process and release all associated resources. * The caller is responsible for releasing a reference on the returned * vnode and credentials. */ static struct ktr_io_params * ktr_freeproc(struct proc *p) { struct ktr_io_params *kiop; struct ktr_request *req; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&ktrace_mtx, MA_OWNED); kiop = ktr_io_params_rele(p->p_ktrioparms); p->p_ktrioparms = NULL; p->p_traceflag = 0; while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); ktr_freerequest_locked(req); } return (kiop); } struct vnode * ktr_get_tracevp(struct proc *p, bool ref) { struct vnode *vp; PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_ktrioparms != NULL) { vp = p->p_ktrioparms->vp; if (ref) vrefact(vp); } else { vp = NULL; } return (vp); } void ktrsyscall(int code, int narg, syscallarg_t args[]) { struct ktr_request *req; struct ktr_syscall *ktp; size_t buflen; char *buf = NULL; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; buflen = sizeof(register_t) * narg; if (buflen > 0) { buf = malloc(buflen, M_KTRACE, M_WAITOK); bcopy(args, buf, buflen); } req = ktr_getrequest(KTR_SYSCALL); if (req == NULL) { if (buf != NULL) free(buf, M_KTRACE); return; } ktp = &req->ktr_data.ktr_syscall; ktp->ktr_code = code; ktp->ktr_narg = narg; if (buflen > 0) { req->ktr_header.ktr_len = buflen; req->ktr_buffer = buf; } ktr_submitrequest(curthread, req); } void ktrsysret(int code, int error, register_t retval) { struct ktr_request *req; struct ktr_sysret *ktp; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_SYSRET); if (req == NULL) return; ktp = &req->ktr_data.ktr_sysret; ktp->ktr_code = code; ktp->ktr_error = error; ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ ktr_submitrequest(curthread, req); } /* * When a setuid process execs, disable tracing. * * XXX: We toss any pending asynchronous records. */ struct ktr_io_params * ktrprocexec(struct proc *p) { struct ktr_io_params *kiop; PROC_LOCK_ASSERT(p, MA_OWNED); kiop = p->p_ktrioparms; if (kiop == NULL || priv_check_cred(kiop->cr, PRIV_DEBUG_DIFFCRED)) return (NULL); mtx_lock(&ktrace_mtx); kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); return (kiop); } /* * When a process exits, drain per-process asynchronous trace records * and disable tracing. */ void ktrprocexit(struct thread *td) { struct ktr_request *req; struct proc *p; struct ktr_io_params *kiop; p = td->td_proc; if (p->p_traceflag == 0) return; ktrace_enter(td); req = ktr_getrequest_entered(td, KTR_PROCDTOR); if (req != NULL) ktr_enqueuerequest(td, req); sx_xlock(&ktrace_sx); ktr_drain(td); sx_xunlock(&ktrace_sx); PROC_LOCK(p); mtx_lock(&ktrace_mtx); kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); ktr_io_params_free(kiop); ktrace_exit(td); } static void ktrprocctor_entered(struct thread *td, struct proc *p) { struct ktr_proc_ctor *ktp; struct ktr_request *req; struct thread *td2; ktrace_assert(td); td2 = FIRST_THREAD_IN_PROC(p); req = ktr_getrequest_entered(td2, KTR_PROCCTOR); if (req == NULL) return; ktp = &req->ktr_data.ktr_proc_ctor; ktp->sv_flags = p->p_sysent->sv_flags; ktr_enqueuerequest(td2, req); } void ktrprocctor(struct proc *p) { struct thread *td = curthread; if ((p->p_traceflag & KTRFAC_MASK) == 0) return; ktrace_enter(td); ktrprocctor_entered(td, p); ktrace_exit(td); } /* * When a process forks, enable tracing in the new process if needed. */ void ktrprocfork(struct proc *p1, struct proc *p2) { MPASS(p2->p_ktrioparms == NULL); MPASS(p2->p_traceflag == 0); if (p1->p_traceflag == 0) return; PROC_LOCK(p1); mtx_lock(&ktrace_mtx); if (p1->p_traceflag & KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_ktrioparms = p1->p_ktrioparms) != NULL) p1->p_ktrioparms->refs++; } mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p1); ktrprocctor(p2); } /* * When a thread returns, drain any asynchronous records generated by the * system call. */ void ktruserret(struct thread *td) { ktrace_enter(td); sx_xlock(&ktrace_sx); ktr_drain(td); sx_xunlock(&ktrace_sx); ktrace_exit(td); } void ktrnamei(const char *path) { struct ktr_request *req; int namelen; char *buf = NULL; namelen = strlen(path); if (namelen > 0) { buf = malloc(namelen, M_KTRACE, M_WAITOK); bcopy(path, buf, namelen); } req = ktr_getrequest(KTR_NAMEI); if (req == NULL) { if (buf != NULL) free(buf, M_KTRACE); return; } if (namelen > 0) { req->ktr_header.ktr_len = namelen; req->ktr_buffer = buf; } ktr_submitrequest(curthread, req); } void ktrsysctl(int *name, u_int namelen) { struct ktr_request *req; u_int mib[CTL_MAXNAME + 2]; char *mibname; size_t mibnamelen; int error; /* Lookup name of mib. */ KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); mib[0] = 0; mib[1] = 1; bcopy(name, mib + 2, namelen * sizeof(*name)); mibnamelen = 128; mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, NULL, 0, &mibnamelen, 0); if (error) { free(mibname, M_KTRACE); return; } req = ktr_getrequest(KTR_SYSCTL); if (req == NULL) { free(mibname, M_KTRACE); return; } req->ktr_header.ktr_len = mibnamelen; req->ktr_buffer = mibname; ktr_submitrequest(curthread, req); } void ktrgenio(int fd, enum uio_rw rw, struct uio *uio, int error) { struct ktr_request *req; struct ktr_genio *ktg; int datalen; char *buf; if (error != 0 && (rw == UIO_READ || error == EFAULT)) { freeuio(uio); return; } uio->uio_offset = 0; uio->uio_rw = UIO_WRITE; datalen = MIN(uio->uio_resid, ktr_geniosize); buf = malloc(datalen, M_KTRACE, M_WAITOK); error = uiomove(buf, datalen, uio); freeuio(uio); if (error) { free(buf, M_KTRACE); return; } req = ktr_getrequest(KTR_GENIO); if (req == NULL) { free(buf, M_KTRACE); return; } ktg = &req->ktr_data.ktr_genio; ktg->ktr_fd = fd; ktg->ktr_rw = rw; req->ktr_header.ktr_len = datalen; req->ktr_buffer = buf; ktr_submitrequest(curthread, req); } void ktrpsig(int sig, sig_t action, sigset_t *mask, int code) { struct thread *td = curthread; struct ktr_request *req; struct ktr_psig *kp; req = ktr_getrequest(KTR_PSIG); if (req == NULL) return; kp = &req->ktr_data.ktr_psig; kp->signo = (char)sig; kp->action = action; kp->mask = *mask; kp->code = code; ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrcsw(int out, int user, const char *wmesg) { struct thread *td = curthread; struct ktr_request *req; struct ktr_csw *kc; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_CSW); if (req == NULL) return; kc = &req->ktr_data.ktr_csw; kc->out = out; kc->user = user; if (wmesg != NULL) strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); else bzero(kc->wmesg, sizeof(kc->wmesg)); ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrstruct(const char *name, const void *data, size_t datalen) { struct ktr_request *req; char *buf; size_t buflen, namelen; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; if (data == NULL) datalen = 0; namelen = strlen(name) + 1; buflen = namelen + datalen; buf = malloc(buflen, M_KTRACE, M_WAITOK); strcpy(buf, name); bcopy(data, buf + namelen, datalen); if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { free(buf, M_KTRACE); return; } req->ktr_buffer = buf; req->ktr_header.ktr_len = buflen; ktr_submitrequest(curthread, req); } void ktrstruct_error(const char *name, const void *data, size_t datalen, int error) { if (error == 0) ktrstruct(name, data, datalen); } void ktrstructarray(const char *name, enum uio_seg seg, const void *data, int num_items, size_t struct_size) { struct ktr_request *req; struct ktr_struct_array *ksa; char *buf; size_t buflen, datalen, namelen; int max_items; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; if (num_items < 0) return; /* Trim array length to genio size. */ max_items = ktr_geniosize / struct_size; if (num_items > max_items) { if (max_items == 0) num_items = 1; else num_items = max_items; } datalen = num_items * struct_size; if (data == NULL) datalen = 0; namelen = strlen(name) + 1; buflen = namelen + datalen; buf = malloc(buflen, M_KTRACE, M_WAITOK); strcpy(buf, name); if (seg == UIO_SYSSPACE) bcopy(data, buf + namelen, datalen); else { if (copyin(data, buf + namelen, datalen) != 0) { free(buf, M_KTRACE); return; } } if ((req = ktr_getrequest(KTR_STRUCT_ARRAY)) == NULL) { free(buf, M_KTRACE); return; } ksa = &req->ktr_data.ktr_struct_array; ksa->struct_size = struct_size; req->ktr_buffer = buf; req->ktr_header.ktr_len = buflen; ktr_submitrequest(curthread, req); } void -ktrcapfail(enum ktr_cap_fail_type type, const cap_rights_t *needed, - const cap_rights_t *held) +ktrcapfail(enum ktr_cap_violation type, const void *data) { struct thread *td = curthread; struct ktr_request *req; struct ktr_cap_fail *kcf; + union ktr_cap_data *kcd; - if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) + if (__predict_false(td->td_pflags & TDP_INKTRACE)) + return; + if (type != CAPFAIL_SYSCALL && + (td->td_sa.callp->sy_flags & SYF_CAPENABLED) == 0) return; req = ktr_getrequest(KTR_CAPFAIL); if (req == NULL) return; kcf = &req->ktr_data.ktr_cap_fail; kcf->cap_type = type; - if (needed != NULL) - kcf->cap_needed = *needed; - else - cap_rights_init(&kcf->cap_needed); - if (held != NULL) - kcf->cap_held = *held; - else - cap_rights_init(&kcf->cap_held); + kcf->cap_code = td->td_sa.code; + kcf->cap_svflags = td->td_proc->p_sysent->sv_flags; + if (data != NULL) { + kcd = &kcf->cap_data; + switch (type) { + case CAPFAIL_NOTCAPABLE: + case CAPFAIL_INCREASE: + kcd->cap_needed = *(const cap_rights_t *)data; + kcd->cap_held = *((const cap_rights_t *)data + 1); + break; + case CAPFAIL_SYSCALL: + case CAPFAIL_SIGNAL: + case CAPFAIL_PROTO: + kcd->cap_int = *(const int *)data; + break; + case CAPFAIL_SOCKADDR: + kcd->cap_sockaddr = *(const struct sockaddr *)data; + break; + case CAPFAIL_NAMEI: + strlcpy(kcd->cap_path, data, MAXPATHLEN); + break; + case CAPFAIL_CPUSET: + default: + break; + } + } ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrfault(vm_offset_t vaddr, int type) { struct thread *td = curthread; struct ktr_request *req; struct ktr_fault *kf; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_FAULT); if (req == NULL) return; kf = &req->ktr_data.ktr_fault; kf->vaddr = vaddr; kf->type = type; ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrfaultend(int result) { struct thread *td = curthread; struct ktr_request *req; struct ktr_faultend *kf; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_FAULTEND); if (req == NULL) return; kf = &req->ktr_data.ktr_faultend; kf->result = result; ktr_enqueuerequest(td, req); ktrace_exit(td); } #endif /* KTRACE */ /* Interface and common routines */ #ifndef _SYS_SYSPROTO_H_ struct ktrace_args { char *fname; int ops; int facs; int pid; }; #endif /* ARGSUSED */ int sys_ktrace(struct thread *td, struct ktrace_args *uap) { #ifdef KTRACE struct vnode *vp = NULL; struct proc *p; struct pgrp *pg; int facs = uap->facs & ~KTRFAC_ROOT; int ops = KTROP(uap->ops); int descend = uap->ops & KTRFLAG_DESCEND; int ret = 0; int flags, error = 0; struct nameidata nd; struct ktr_io_params *kiop, *old_kiop; /* * Need something to (un)trace. */ if (ops != KTROP_CLEARFILE && facs == 0) return (EINVAL); kiop = NULL; if (ops != KTROP_CLEAR) { /* * an operation which requires a file argument. */ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname); flags = FREAD | FWRITE | O_NOFOLLOW; error = vn_open(&nd, &flags, 0, NULL); if (error) return (error); NDFREE_PNBUF(&nd); vp = nd.ni_vp; VOP_UNLOCK(vp); if (vp->v_type != VREG) { (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); return (EACCES); } kiop = ktr_io_params_alloc(td, vp); } /* * Clear all uses of the tracefile. */ ktrace_enter(td); if (ops == KTROP_CLEARFILE) { restart: sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { old_kiop = NULL; PROC_LOCK(p); if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp == vp) { if (ktrcanset(td, p)) { mtx_lock(&ktrace_mtx); old_kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); } else error = EPERM; } PROC_UNLOCK(p); if (old_kiop != NULL) { sx_sunlock(&allproc_lock); ktr_io_params_free(old_kiop); goto restart; } } sx_sunlock(&allproc_lock); goto done; } /* * do it */ sx_slock(&proctree_lock); if (uap->pid < 0) { /* * by process group */ pg = pgfind(-uap->pid); if (pg == NULL) { sx_sunlock(&proctree_lock); error = ESRCH; goto done; } /* * ktrops() may call vrele(). Lock pg_members * by the proctree_lock rather than pg_mtx. */ PGRP_UNLOCK(pg); if (LIST_EMPTY(&pg->pg_members)) { sx_sunlock(&proctree_lock); error = ESRCH; goto done; } LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (descend) ret |= ktrsetchildren(td, p, ops, facs, kiop); else ret |= ktrops(td, p, ops, facs, kiop); } } else { /* * by pid */ p = pfind(uap->pid); if (p == NULL) { error = ESRCH; sx_sunlock(&proctree_lock); goto done; } if (descend) ret |= ktrsetchildren(td, p, ops, facs, kiop); else ret |= ktrops(td, p, ops, facs, kiop); } sx_sunlock(&proctree_lock); if (!ret) error = EPERM; done: if (kiop != NULL) { mtx_lock(&ktrace_mtx); kiop = ktr_io_params_rele(kiop); mtx_unlock(&ktrace_mtx); ktr_io_params_free(kiop); } ktrace_exit(td); return (error); #else /* !KTRACE */ return (ENOSYS); #endif /* KTRACE */ } /* ARGSUSED */ int sys_utrace(struct thread *td, struct utrace_args *uap) { #ifdef KTRACE struct ktr_request *req; void *cp; int error; if (!KTRPOINT(td, KTR_USER)) return (0); if (uap->len > KTR_USER_MAXLEN) return (EINVAL); cp = malloc(uap->len, M_KTRACE, M_WAITOK); error = copyin(uap->addr, cp, uap->len); if (error) { free(cp, M_KTRACE); return (error); } req = ktr_getrequest(KTR_USER); if (req == NULL) { free(cp, M_KTRACE); return (ENOMEM); } req->ktr_buffer = cp; req->ktr_header.ktr_len = uap->len; ktr_submitrequest(td, req); return (0); #else /* !KTRACE */ return (ENOSYS); #endif /* KTRACE */ } #ifdef KTRACE static int ktrops(struct thread *td, struct proc *p, int ops, int facs, struct ktr_io_params *new_kiop) { struct ktr_io_params *old_kiop; PROC_LOCK_ASSERT(p, MA_OWNED); if (!ktrcanset(td, p)) { PROC_UNLOCK(p); return (0); } if ((ops == KTROP_SET && p->p_state == PRS_NEW) || p_cansee(td, p) != 0) { /* * Disallow setting trace points if the process is being born. * This avoids races with trace point inheritance in * ktrprocfork(). */ PROC_UNLOCK(p); return (0); } if ((p->p_flag & P_WEXIT) != 0) { /* * There's nothing to do if the process is exiting, but avoid * signaling an error. */ PROC_UNLOCK(p); return (1); } old_kiop = NULL; mtx_lock(&ktrace_mtx); if (ops == KTROP_SET) { if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp != new_kiop->vp) { /* if trace file already in use, relinquish below */ old_kiop = ktr_io_params_rele(p->p_ktrioparms); p->p_ktrioparms = NULL; } if (p->p_ktrioparms == NULL) { p->p_ktrioparms = new_kiop; ktr_io_params_ref(new_kiop); } p->p_traceflag |= facs; if (priv_check(td, PRIV_KTRACE) == 0) p->p_traceflag |= KTRFAC_ROOT; } else { /* KTROP_CLEAR */ if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) /* no more tracing */ old_kiop = ktr_freeproc(p); } mtx_unlock(&ktrace_mtx); if ((p->p_traceflag & KTRFAC_MASK) != 0) ktrprocctor_entered(td, p); PROC_UNLOCK(p); ktr_io_params_free(old_kiop); return (1); } static int ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, struct ktr_io_params *new_kiop) { struct proc *p; int ret = 0; p = top; PROC_LOCK_ASSERT(p, MA_OWNED); sx_assert(&proctree_lock, SX_LOCKED); for (;;) { ret |= ktrops(td, p, ops, facs, new_kiop); /* * If this process has children, descend to them next, * otherwise do any siblings, and if done with this level, * follow back up the tree (but not past top). */ if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) return (ret); if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } PROC_LOCK(p); } /*NOTREACHED*/ } static void ktr_writerequest(struct thread *td, struct ktr_request *req) { struct ktr_io_params *kiop, *kiop1; struct ktr_header *kth; struct vnode *vp; struct proc *p; struct ucred *cred; struct uio auio; struct iovec aiov[3]; struct mount *mp; off_t lim; int datalen, buflen; int error; p = td->td_proc; /* * We reference the kiop for use in I/O in case ktrace is * disabled on the process as we write out the request. */ mtx_lock(&ktrace_mtx); kiop = p->p_ktrioparms; /* * If kiop is NULL, it has been cleared out from under this * request, so just drop it. */ if (kiop == NULL) { mtx_unlock(&ktrace_mtx); return; } ktr_io_params_ref(kiop); vp = kiop->vp; cred = kiop->cr; lim = kiop->lim; KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); mtx_unlock(&ktrace_mtx); kth = &req->ktr_header; KASSERT(((u_short)kth->ktr_type & ~KTR_TYPE) < nitems(data_lengths), ("data_lengths array overflow")); datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_TYPE]; buflen = kth->ktr_len; auio.uio_iov = &aiov[0]; auio.uio_offset = 0; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; aiov[0].iov_base = (caddr_t)kth; aiov[0].iov_len = sizeof(struct ktr_header); auio.uio_resid = sizeof(struct ktr_header); auio.uio_iovcnt = 1; auio.uio_td = td; if (datalen != 0) { aiov[1].iov_base = (caddr_t)&req->ktr_data; aiov[1].iov_len = datalen; auio.uio_resid += datalen; auio.uio_iovcnt++; kth->ktr_len += datalen; } if (buflen != 0) { KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; aiov[auio.uio_iovcnt].iov_len = buflen; auio.uio_resid += buflen; auio.uio_iovcnt++; } vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); td->td_ktr_io_lim = lim; #ifdef MAC error = mac_vnode_check_write(cred, NOCRED, vp); if (error == 0) #endif error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); VOP_UNLOCK(vp); vn_finished_write(mp); if (error == 0) { mtx_lock(&ktrace_mtx); kiop = ktr_io_params_rele(kiop); mtx_unlock(&ktrace_mtx); ktr_io_params_free(kiop); return; } /* * If error encountered, give up tracing on this vnode on this * process. Other processes might still be suitable for * writes to this vnode. */ log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped for pid %d\n", error, p->p_pid); kiop1 = NULL; PROC_LOCK(p); mtx_lock(&ktrace_mtx); if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp == vp) kiop1 = ktr_freeproc(p); kiop = ktr_io_params_rele(kiop); mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); ktr_io_params_free(kiop1); ktr_io_params_free(kiop); } /* * Return true if caller has permission to set the ktracing state * of target. Essentially, the target can't possess any * more permissions than the caller. KTRFAC_ROOT signifies that * root previously set the tracing status on the target process, and * so, only root may further change it. */ static int ktrcanset(struct thread *td, struct proc *targetp) { PROC_LOCK_ASSERT(targetp, MA_OWNED); if (targetp->p_traceflag & KTRFAC_ROOT && priv_check(td, PRIV_KTRACE)) return (0); if (p_candebug(td, targetp) != 0) return (0); return (1); } #endif /* KTRACE */ diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index fac264a78531..d59ad37f35ec 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -1,687 +1,685 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2008-2011 Robert N. M. Watson * Copyright (c) 2010-2011 Jonathan Anderson * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Portions of this software were developed by Pawel Jakub Dawidek under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * FreeBSD kernel capability facility. * * Two kernel features are implemented here: capability mode, a sandboxed mode * of execution for processes, and capabilities, a refinement on file * descriptors that allows fine-grained control over operations on the file * descriptor. Collectively, these allow processes to run in the style of a * historic "capability system" in which they can use only resources * explicitly delegated to them. This model is enforced by restricting access * to global namespaces in capability mode. * * Capabilities wrap other file descriptor types, binding them to a constant * rights mask set when the capability is created. New capabilities may be * derived from existing capabilities, but only if they have the same or a * strict subset of the rights on the original capability. * * System calls permitted in capability mode are defined in capabilities.conf; * calls must be carefully audited for safety to ensure that they don't allow * escape from a sandbox. Some calls permit only a subset of operations in * capability mode -- for example, shm_open(2) is limited to creating * anonymous, rather than named, POSIX shared memory objects. */ #include #include "opt_capsicum.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include bool __read_frequently trap_enotcap; SYSCTL_BOOL(_kern, OID_AUTO, trap_enotcap, CTLFLAG_RWTUN, &trap_enotcap, 0, "Deliver SIGTRAP on ENOTCAPABLE"); #ifdef CAPABILITY_MODE #define IOCTLS_MAX_COUNT 256 /* XXX: Is 256 sane? */ FEATURE(security_capability_mode, "Capsicum Capability Mode"); /* * System call to enter capability mode for the process. */ int sys_cap_enter(struct thread *td, struct cap_enter_args *uap) { struct ucred *newcred, *oldcred; struct proc *p; if (IN_CAPABILITY_MODE(td)) return (0); newcred = crget(); p = td->td_proc; PROC_LOCK(p); oldcred = crcopysafe(p, newcred); newcred->cr_flags |= CRED_FLAG_CAPMODE; proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); } /* * System call to query whether the process is in capability mode. */ int sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap) { u_int i; i = IN_CAPABILITY_MODE(td) ? 1 : 0; return (copyout(&i, uap->modep, sizeof(i))); } #else /* !CAPABILITY_MODE */ int sys_cap_enter(struct thread *td, struct cap_enter_args *uap) { return (ENOSYS); } int sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap) { return (ENOSYS); } #endif /* CAPABILITY_MODE */ #ifdef CAPABILITIES FEATURE(security_capabilities, "Capsicum Capabilities"); MALLOC_DECLARE(M_FILECAPS); static inline int _cap_check(const cap_rights_t *havep, const cap_rights_t *needp, - enum ktr_cap_fail_type type) + enum ktr_cap_violation type) { + const cap_rights_t rights[] = { *needp, *havep }; if (!cap_rights_contains(havep, needp)) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(type, needp, havep); -#endif + if (CAP_TRACING(curthread)) + ktrcapfail(type, rights); return (ENOTCAPABLE); } return (0); } /* * Test whether a capability grants the requested rights. */ int cap_check(const cap_rights_t *havep, const cap_rights_t *needp) { return (_cap_check(havep, needp, CAPFAIL_NOTCAPABLE)); } int cap_check_failed_notcapable(const cap_rights_t *havep, const cap_rights_t *needp) { + const cap_rights_t rights[] = { *needp, *havep }; -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_NOTCAPABLE, needp, havep); -#endif + if (CAP_TRACING(curthread)) + ktrcapfail(CAPFAIL_NOTCAPABLE, rights); return (ENOTCAPABLE); } /* * Convert capability rights into VM access flags. */ vm_prot_t cap_rights_to_vmprot(const cap_rights_t *havep) { vm_prot_t maxprot; maxprot = VM_PROT_NONE; if (cap_rights_is_set(havep, CAP_MMAP_R)) maxprot |= VM_PROT_READ; if (cap_rights_is_set(havep, CAP_MMAP_W)) maxprot |= VM_PROT_WRITE; if (cap_rights_is_set(havep, CAP_MMAP_X)) maxprot |= VM_PROT_EXECUTE; return (maxprot); } /* * Extract rights from a capability for monitoring purposes -- not for use in * any other way, as we want to keep all capability permission evaluation in * this one file. */ const cap_rights_t * cap_rights_fde(const struct filedescent *fdep) { return (cap_rights_fde_inline(fdep)); } const cap_rights_t * cap_rights(struct filedesc *fdp, int fd) { return (cap_rights_fde(&fdp->fd_ofiles[fd])); } int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights) { struct filedesc *fdp; struct filedescent *fdep; u_long *ioctls; int error; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); fdep = fdeget_noref(fdp, fd); if (fdep == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } ioctls = NULL; error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); if (error == 0) { seqc_write_begin(&fdep->fde_seqc); fdep->fde_rights = *rights; if (!cap_rights_is_set(rights, CAP_IOCTL)) { ioctls = fdep->fde_ioctls; fdep->fde_ioctls = NULL; fdep->fde_nioctls = 0; } if (!cap_rights_is_set(rights, CAP_FCNTL)) fdep->fde_fcntls = 0; seqc_write_end(&fdep->fde_seqc); } FILEDESC_XUNLOCK(fdp); free(ioctls, M_FILECAPS); return (error); } /* * System call to limit rights of the given capability. */ int sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { cap_rights_t rights; int error, version; cap_rights_init_zero(&rights); error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0])); if (error != 0) return (error); version = CAPVER(&rights); if (version != CAP_RIGHTS_VERSION_00) return (EINVAL); error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights)); if (error != 0) return (error); /* Check for race. */ if (CAPVER(&rights) != version) return (EINVAL); if (!cap_rights_is_valid(&rights)) return (EINVAL); if (version != CAP_RIGHTS_VERSION) { rights.cr_rights[0] &= ~(0x3ULL << 62); rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62); } #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrcaprights(&rights); #endif AUDIT_ARG_FD(uap->fd); AUDIT_ARG_RIGHTS(&rights); return (kern_cap_rights_limit(td, uap->fd, &rights)); } /* * System call to query the rights mask associated with a capability. */ int sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap) { struct filedesc *fdp; cap_rights_t rights; int error, fd, i, n; if (uap->version != CAP_RIGHTS_VERSION_00) return (EINVAL); fd = uap->fd; AUDIT_ARG_FD(fd); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); if (fget_noref(fdp, fd) == NULL) { FILEDESC_SUNLOCK(fdp); return (EBADF); } rights = *cap_rights(fdp, fd); FILEDESC_SUNLOCK(fdp); n = uap->version + 2; if (uap->version != CAPVER(&rights)) { /* * For older versions we need to check if the descriptor * doesn't contain rights not understood by the caller. * If it does, we have to return an error. */ for (i = n; i < CAPARSIZE(&rights); i++) { if ((rights.cr_rights[i] & ~(0x7FULL << 57)) != 0) return (EINVAL); } } error = copyout(&rights, uap->rightsp, sizeof(rights.cr_rights[0]) * n); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT)) ktrcaprights(&rights); #endif return (error); } /* * Test whether a capability grants the given ioctl command. * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and * ENOTCAPABLE will be returned. */ int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd) { struct filedescent *fdep; u_long *cmds; ssize_t ncmds; long i; KASSERT(fd >= 0 && fd < fdp->fd_nfiles, ("%s: invalid fd=%d", __func__, fd)); fdep = fdeget_noref(fdp, fd); KASSERT(fdep != NULL, ("%s: invalid fd=%d", __func__, fd)); ncmds = fdep->fde_nioctls; if (ncmds == -1) return (0); cmds = fdep->fde_ioctls; for (i = 0; i < ncmds; i++) { if (cmds[i] == cmd) return (0); } return (ENOTCAPABLE); } /* * Check if the current ioctls list can be replaced by the new one. */ static int cap_ioctl_limit_check(struct filedescent *fdep, const u_long *cmds, size_t ncmds) { u_long *ocmds; ssize_t oncmds; u_long i; long j; oncmds = fdep->fde_nioctls; if (oncmds == -1) return (0); if (oncmds < (ssize_t)ncmds) return (ENOTCAPABLE); ocmds = fdep->fde_ioctls; for (i = 0; i < ncmds; i++) { for (j = 0; j < oncmds; j++) { if (cmds[i] == ocmds[j]) break; } if (j == oncmds) return (ENOTCAPABLE); } return (0); } int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds) { struct filedesc *fdp; struct filedescent *fdep; u_long *ocmds; int error; AUDIT_ARG_FD(fd); if (ncmds > IOCTLS_MAX_COUNT) { error = EINVAL; goto out_free; } fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); fdep = fdeget_noref(fdp, fd); if (fdep == NULL) { error = EBADF; goto out; } error = cap_ioctl_limit_check(fdep, cmds, ncmds); if (error != 0) goto out; ocmds = fdep->fde_ioctls; seqc_write_begin(&fdep->fde_seqc); fdep->fde_ioctls = cmds; fdep->fde_nioctls = ncmds; seqc_write_end(&fdep->fde_seqc); cmds = ocmds; error = 0; out: FILEDESC_XUNLOCK(fdp); out_free: free(cmds, M_FILECAPS); return (error); } int sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { u_long *cmds; size_t ncmds; int error; ncmds = uap->ncmds; if (ncmds > IOCTLS_MAX_COUNT) return (EINVAL); if (ncmds == 0) { cmds = NULL; } else { cmds = malloc(sizeof(cmds[0]) * ncmds, M_FILECAPS, M_WAITOK); error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds); if (error != 0) { free(cmds, M_FILECAPS); return (error); } } return (kern_cap_ioctls_limit(td, uap->fd, cmds, ncmds)); } int sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { struct filedesc *fdp; struct filedescent *fdep; u_long *cmdsp, *dstcmds; size_t maxcmds, ncmds; int16_t count; int error, fd; fd = uap->fd; dstcmds = uap->cmds; maxcmds = uap->maxcmds; AUDIT_ARG_FD(fd); fdp = td->td_proc->p_fd; cmdsp = NULL; if (dstcmds != NULL) { cmdsp = malloc(sizeof(cmdsp[0]) * IOCTLS_MAX_COUNT, M_FILECAPS, M_WAITOK | M_ZERO); } FILEDESC_SLOCK(fdp); fdep = fdeget_noref(fdp, fd); if (fdep == NULL) { error = EBADF; FILEDESC_SUNLOCK(fdp); goto out; } count = fdep->fde_nioctls; if (count != -1 && cmdsp != NULL) { ncmds = MIN(count, maxcmds); memcpy(cmdsp, fdep->fde_ioctls, sizeof(cmdsp[0]) * ncmds); } FILEDESC_SUNLOCK(fdp); /* * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL) * the only sane thing we can do is to not populate the given array and * return CAP_IOCTLS_ALL. */ if (count != -1) { if (cmdsp != NULL) { error = copyout(cmdsp, dstcmds, sizeof(cmdsp[0]) * ncmds); if (error != 0) goto out; } td->td_retval[0] = count; } else { td->td_retval[0] = CAP_IOCTLS_ALL; } error = 0; out: free(cmdsp, M_FILECAPS); return (error); } /* * Test whether a capability grants the given fcntl command. */ int cap_fcntl_check_fde(struct filedescent *fdep, int cmd) { uint32_t fcntlcap; fcntlcap = (1 << cmd); KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, ("Unsupported fcntl=%d.", cmd)); if ((fdep->fde_fcntls & fcntlcap) != 0) return (0); return (ENOTCAPABLE); } int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) { KASSERT(fd >= 0 && fd < fdp->fd_nfiles, ("%s: invalid fd=%d", __func__, fd)); return (cap_fcntl_check_fde(&fdp->fd_ofiles[fd], cmd)); } int sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { struct filedesc *fdp; struct filedescent *fdep; uint32_t fcntlrights; int fd; fd = uap->fd; fcntlrights = uap->fcntlrights; AUDIT_ARG_FD(fd); AUDIT_ARG_FCNTL_RIGHTS(fcntlrights); if ((fcntlrights & ~CAP_FCNTL_ALL) != 0) return (EINVAL); fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); fdep = fdeget_noref(fdp, fd); if (fdep == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } if ((fcntlrights & ~fdep->fde_fcntls) != 0) { FILEDESC_XUNLOCK(fdp); return (ENOTCAPABLE); } seqc_write_begin(&fdep->fde_seqc); fdep->fde_fcntls = fcntlrights; seqc_write_end(&fdep->fde_seqc); FILEDESC_XUNLOCK(fdp); return (0); } int sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { struct filedesc *fdp; struct filedescent *fdep; uint32_t rights; int fd; fd = uap->fd; AUDIT_ARG_FD(fd); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); fdep = fdeget_noref(fdp, fd); if (fdep == NULL) { FILEDESC_SUNLOCK(fdp); return (EBADF); } rights = fdep->fde_fcntls; FILEDESC_SUNLOCK(fdp); return (copyout(&rights, uap->fcntlrightsp, sizeof(rights))); } #else /* !CAPABILITIES */ /* * Stub Capability functions for when options CAPABILITIES isn't compiled * into the kernel. */ int sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { return (ENOSYS); } int sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap) { return (ENOSYS); } int sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { return (ENOSYS); } int sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { return (ENOSYS); } int sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { return (ENOSYS); } int sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { return (ENOSYS); } #endif /* CAPABILITIES */ diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h index 93c5f0c9c390..05712abf37d1 100644 --- a/sys/sys/capsicum.h +++ b/sys/sys/capsicum.h @@ -1,509 +1,516 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2008-2010, 2015 Robert N. M. Watson * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Portions of this software were developed by Pawel Jakub Dawidek under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Definitions for FreeBSD capabilities facility. */ #ifndef _SYS_CAPSICUM_H_ #define _SYS_CAPSICUM_H_ #include #include #include #include #include +#include #ifndef _KERNEL #include #endif #define CAPRIGHT(idx, bit) ((1ULL << (57 + (idx))) | (bit)) /* * Possible rights on capabilities. * * Notes: * Some system calls don't require a capability in order to perform an * operation on an fd. These include: close, dup, dup2. * * sendfile is authorized using CAP_READ on the file and CAP_WRITE on the * socket. * * mmap() and aio*() system calls will need special attention as they may * involve reads or writes depending a great deal on context. */ /* INDEX 0 */ /* * General file I/O. */ /* Allows for openat(O_RDONLY), read(2), readv(2). */ #define CAP_READ CAPRIGHT(0, 0x0000000000000001ULL) /* Allows for openat(O_WRONLY | O_APPEND), write(2), writev(2). */ #define CAP_WRITE CAPRIGHT(0, 0x0000000000000002ULL) /* Allows for lseek(fd, 0, SEEK_CUR). */ #define CAP_SEEK_TELL CAPRIGHT(0, 0x0000000000000004ULL) /* Allows for lseek(2). */ #define CAP_SEEK (CAP_SEEK_TELL | 0x0000000000000008ULL) /* Allows for aio_read(2), pread(2), preadv(2). */ #define CAP_PREAD (CAP_SEEK | CAP_READ) /* * Allows for aio_write(2), openat(O_WRONLY) (without O_APPEND), pwrite(2), * pwritev(2). */ #define CAP_PWRITE (CAP_SEEK | CAP_WRITE) /* Allows for mmap(PROT_NONE). */ #define CAP_MMAP CAPRIGHT(0, 0x0000000000000010ULL) /* Allows for mmap(PROT_READ). */ #define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) /* Allows for mmap(PROT_WRITE). */ #define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) /* Allows for mmap(PROT_EXEC). */ #define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000020ULL) /* Allows for mmap(PROT_READ | PROT_WRITE). */ #define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) /* Allows for mmap(PROT_READ | PROT_EXEC). */ #define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) /* Allows for mmap(PROT_WRITE | PROT_EXEC). */ #define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) /* Allows for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). */ #define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) /* Allows for openat(O_CREAT). */ #define CAP_CREATE CAPRIGHT(0, 0x0000000000000040ULL) /* Allows for openat(O_EXEC) and fexecve(2) in turn. */ #define CAP_FEXECVE CAPRIGHT(0, 0x0000000000000080ULL) /* Allows for openat(O_SYNC), openat(O_FSYNC), fsync(2), aio_fsync(2). */ #define CAP_FSYNC CAPRIGHT(0, 0x0000000000000100ULL) /* Allows for openat(O_TRUNC), ftruncate(2). */ #define CAP_FTRUNCATE CAPRIGHT(0, 0x0000000000000200ULL) /* Lookups - used to constrain *at() calls. */ #define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL) /* VFS methods. */ /* Allows for fchdir(2). */ #define CAP_FCHDIR CAPRIGHT(0, 0x0000000000000800ULL) /* Allows for fchflags(2). */ #define CAP_FCHFLAGS CAPRIGHT(0, 0x0000000000001000ULL) /* Allows for fchflags(2) and chflagsat(2). */ #define CAP_CHFLAGSAT (CAP_FCHFLAGS | CAP_LOOKUP) /* Allows for fchmod(2). */ #define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL) /* Allows for fchmod(2) and fchmodat(2). */ #define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP) /* Allows for fchown(2). */ #define CAP_FCHOWN CAPRIGHT(0, 0x0000000000004000ULL) /* Allows for fchown(2) and fchownat(2). */ #define CAP_FCHOWNAT (CAP_FCHOWN | CAP_LOOKUP) /* Allows for fcntl(2). */ #define CAP_FCNTL CAPRIGHT(0, 0x0000000000008000ULL) /* * Allows for flock(2), openat(O_SHLOCK), openat(O_EXLOCK), * fcntl(F_SETLK_REMOTE), fcntl(F_SETLKW), fcntl(F_SETLK), fcntl(F_GETLK). */ #define CAP_FLOCK CAPRIGHT(0, 0x0000000000010000ULL) /* Allows for fpathconf(2). */ #define CAP_FPATHCONF CAPRIGHT(0, 0x0000000000020000ULL) /* Allows for UFS background-fsck operations. */ #define CAP_FSCK CAPRIGHT(0, 0x0000000000040000ULL) /* Allows for fstat(2). */ #define CAP_FSTAT CAPRIGHT(0, 0x0000000000080000ULL) /* Allows for fstat(2), fstatat(2) and faccessat(2). */ #define CAP_FSTATAT (CAP_FSTAT | CAP_LOOKUP) /* Allows for fstatfs(2). */ #define CAP_FSTATFS CAPRIGHT(0, 0x0000000000100000ULL) /* Allows for futimens(2) and futimes(2). */ #define CAP_FUTIMES CAPRIGHT(0, 0x0000000000200000ULL) /* Allows for futimens(2), futimes(2), futimesat(2) and utimensat(2). */ #define CAP_FUTIMESAT (CAP_FUTIMES | CAP_LOOKUP) /* Allows for linkat(2) (target directory descriptor). */ #define CAP_LINKAT_TARGET (CAP_LOOKUP | 0x0000000000400000ULL) /* Allows for mkdirat(2). */ #define CAP_MKDIRAT (CAP_LOOKUP | 0x0000000000800000ULL) /* Allows for mkfifoat(2). */ #define CAP_MKFIFOAT (CAP_LOOKUP | 0x0000000001000000ULL) /* Allows for mknodat(2). */ #define CAP_MKNODAT (CAP_LOOKUP | 0x0000000002000000ULL) /* Allows for renameat(2) (source directory descriptor). */ #define CAP_RENAMEAT_SOURCE (CAP_LOOKUP | 0x0000000004000000ULL) /* Allows for symlinkat(2). */ #define CAP_SYMLINKAT (CAP_LOOKUP | 0x0000000008000000ULL) /* * Allows for unlinkat(2) and renameat(2) if destination object exists and * will be removed. */ #define CAP_UNLINKAT (CAP_LOOKUP | 0x0000000010000000ULL) /* Socket operations. */ /* Allows for accept(2) and accept4(2). */ #define CAP_ACCEPT CAPRIGHT(0, 0x0000000020000000ULL) /* Allows for bind(2). */ #define CAP_BIND CAPRIGHT(0, 0x0000000040000000ULL) /* Allows for connect(2). */ #define CAP_CONNECT CAPRIGHT(0, 0x0000000080000000ULL) /* Allows for getpeername(2). */ #define CAP_GETPEERNAME CAPRIGHT(0, 0x0000000100000000ULL) /* Allows for getsockname(2). */ #define CAP_GETSOCKNAME CAPRIGHT(0, 0x0000000200000000ULL) /* Allows for getsockopt(2). */ #define CAP_GETSOCKOPT CAPRIGHT(0, 0x0000000400000000ULL) /* Allows for listen(2). */ #define CAP_LISTEN CAPRIGHT(0, 0x0000000800000000ULL) /* Allows for sctp_peeloff(2). */ #define CAP_PEELOFF CAPRIGHT(0, 0x0000001000000000ULL) #define CAP_RECV CAP_READ #define CAP_SEND CAP_WRITE /* Allows for setsockopt(2). */ #define CAP_SETSOCKOPT CAPRIGHT(0, 0x0000002000000000ULL) /* Allows for shutdown(2). */ #define CAP_SHUTDOWN CAPRIGHT(0, 0x0000004000000000ULL) /* Allows for bindat(2) on a directory descriptor. */ #define CAP_BINDAT (CAP_LOOKUP | 0x0000008000000000ULL) /* Allows for connectat(2) on a directory descriptor. */ #define CAP_CONNECTAT (CAP_LOOKUP | 0x0000010000000000ULL) /* Allows for linkat(2) (source directory descriptor). */ #define CAP_LINKAT_SOURCE (CAP_LOOKUP | 0x0000020000000000ULL) /* Allows for renameat(2) (target directory descriptor). */ #define CAP_RENAMEAT_TARGET (CAP_LOOKUP | 0x0000040000000000ULL) #define CAP_SOCK_CLIENT \ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) #define CAP_SOCK_SERVER \ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ CAP_SETSOCKOPT | CAP_SHUTDOWN) /* All used bits for index 0. */ #define CAP_ALL0 CAPRIGHT(0, 0x000007FFFFFFFFFFULL) /* Available bits for index 0. */ #define CAP_UNUSED0_44 CAPRIGHT(0, 0x0000080000000000ULL) /* ... */ #define CAP_UNUSED0_57 CAPRIGHT(0, 0x0100000000000000ULL) /* INDEX 1 */ /* Mandatory Access Control. */ /* Allows for mac_get_fd(3). */ #define CAP_MAC_GET CAPRIGHT(1, 0x0000000000000001ULL) /* Allows for mac_set_fd(3). */ #define CAP_MAC_SET CAPRIGHT(1, 0x0000000000000002ULL) /* Methods on semaphores. */ #define CAP_SEM_GETVALUE CAPRIGHT(1, 0x0000000000000004ULL) #define CAP_SEM_POST CAPRIGHT(1, 0x0000000000000008ULL) #define CAP_SEM_WAIT CAPRIGHT(1, 0x0000000000000010ULL) /* Allows select(2) and poll(2) on descriptor. */ #define CAP_EVENT CAPRIGHT(1, 0x0000000000000020ULL) /* Allows for kevent(2) on kqueue descriptor with eventlist != NULL. */ #define CAP_KQUEUE_EVENT CAPRIGHT(1, 0x0000000000000040ULL) /* Strange and powerful rights that should not be given lightly. */ /* Allows for ioctl(2). */ #define CAP_IOCTL CAPRIGHT(1, 0x0000000000000080ULL) #define CAP_TTYHOOK CAPRIGHT(1, 0x0000000000000100ULL) /* Process management via process descriptors. */ /* Allows for pdgetpid(2). */ #define CAP_PDGETPID CAPRIGHT(1, 0x0000000000000200ULL) /* * Allows for pdwait4(2). * * XXX: this constant was imported unused, but is targeted to be implemented * in the future (bug 235871). */ #define CAP_PDWAIT CAPRIGHT(1, 0x0000000000000400ULL) /* Allows for pdkill(2). */ #define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL) /* Extended attributes. */ /* Allows for extattr_delete_fd(2). */ #define CAP_EXTATTR_DELETE CAPRIGHT(1, 0x0000000000001000ULL) /* Allows for extattr_get_fd(2). */ #define CAP_EXTATTR_GET CAPRIGHT(1, 0x0000000000002000ULL) /* Allows for extattr_list_fd(2). */ #define CAP_EXTATTR_LIST CAPRIGHT(1, 0x0000000000004000ULL) /* Allows for extattr_set_fd(2). */ #define CAP_EXTATTR_SET CAPRIGHT(1, 0x0000000000008000ULL) /* Access Control Lists. */ /* Allows for acl_valid_fd_np(3). */ #define CAP_ACL_CHECK CAPRIGHT(1, 0x0000000000010000ULL) /* Allows for acl_delete_fd_np(3). */ #define CAP_ACL_DELETE CAPRIGHT(1, 0x0000000000020000ULL) /* Allows for acl_get_fd(3) and acl_get_fd_np(3). */ #define CAP_ACL_GET CAPRIGHT(1, 0x0000000000040000ULL) /* Allows for acl_set_fd(3) and acl_set_fd_np(3). */ #define CAP_ACL_SET CAPRIGHT(1, 0x0000000000080000ULL) /* Allows for kevent(2) on kqueue descriptor with changelist != NULL. */ #define CAP_KQUEUE_CHANGE CAPRIGHT(1, 0x0000000000100000ULL) #define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE) /* All used bits for index 1. */ #define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL) /* Available bits for index 1. */ #define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL) /* ... */ #define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL) /* Backward compatibility. */ #define CAP_POLL_EVENT CAP_EVENT #define CAP_ALL(rights) do { \ (rights)->cr_rights[0] = \ ((uint64_t)CAP_RIGHTS_VERSION << 62) | CAP_ALL0; \ (rights)->cr_rights[1] = CAP_ALL1; \ } while (0) #define CAP_NONE(rights) do { \ (rights)->cr_rights[0] = \ ((uint64_t)CAP_RIGHTS_VERSION << 62) | CAPRIGHT(0, 0ULL); \ (rights)->cr_rights[1] = CAPRIGHT(1, 0ULL); \ } while (0) #define CAPRVER(right) ((int)((right) >> 62)) #define CAPVER(rights) CAPRVER((rights)->cr_rights[0]) #define CAPARSIZE(rights) (CAPVER(rights) + 2) #define CAPIDXBIT(right) ((int)(((right) >> 57) & 0x1F)) /* * Allowed fcntl(2) commands. */ #define CAP_FCNTL_GETFL (1 << F_GETFL) #define CAP_FCNTL_SETFL (1 << F_SETFL) #define CAP_FCNTL_GETOWN (1 << F_GETOWN) #define CAP_FCNTL_SETOWN (1 << F_SETOWN) #define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL | \ CAP_FCNTL_GETOWN | CAP_FCNTL_SETOWN) #define CAP_IOCTLS_ALL SSIZE_MAX __BEGIN_DECLS #define cap_rights_init(...) \ __cap_rights_init(CAP_RIGHTS_VERSION, __VA_ARGS__, 0ULL) cap_rights_t *__cap_rights_init(int version, cap_rights_t *rights, ...); #define cap_rights_set(...) \ __cap_rights_set(__VA_ARGS__, 0ULL) cap_rights_t *__cap_rights_set(cap_rights_t *rights, ...); #define cap_rights_clear(...) \ __cap_rights_clear(__VA_ARGS__, 0ULL) cap_rights_t *__cap_rights_clear(cap_rights_t *rights, ...); #define cap_rights_is_set(...) \ __cap_rights_is_set(__VA_ARGS__, 0ULL) bool __cap_rights_is_set(const cap_rights_t *rights, ...); bool cap_rights_is_valid(const cap_rights_t *rights); cap_rights_t *cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src); cap_rights_t *cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src); #ifdef _KERNEL /* * We only support one size to reduce branching. */ _Static_assert(CAP_RIGHTS_VERSION == CAP_RIGHTS_VERSION_00, "unsupported version of capsicum rights"); #define cap_rights_init_zero(r) ({ \ cap_rights_t *_r = (r); \ CAP_NONE(_r); \ _r; \ }) #define cap_rights_init_one(r, right) ({ \ CTASSERT(CAPRVER(right) == CAP_RIGHTS_VERSION); \ cap_rights_t *_r = (r); \ CAP_NONE(_r); \ _r->cr_rights[CAPIDXBIT(right) - 1] |= right; \ _r; \ }) #define cap_rights_set_one(r, right) ({ \ CTASSERT(CAPRVER(right) == CAP_RIGHTS_VERSION); \ cap_rights_t *_r = (r); \ _r->cr_rights[CAPIDXBIT(right) - 1] |= right; \ _r; \ }) /* * Allow checking caps which are possibly getting modified at the same time. * The caller is expected to determine whether the result is legitimate via * other means, see fget_unlocked for an example. */ static inline bool cap_rights_contains_transient(const cap_rights_t *big, const cap_rights_t *little) { if (__predict_true( (big->cr_rights[0] & little->cr_rights[0]) == little->cr_rights[0] && (big->cr_rights[1] & little->cr_rights[1]) == little->cr_rights[1])) return (true); return (false); } #define cap_rights_contains cap_rights_contains_transient int cap_check_failed_notcapable(const cap_rights_t *havep, const cap_rights_t *needp); static inline int cap_check_inline(const cap_rights_t *havep, const cap_rights_t *needp) { if (__predict_false(!cap_rights_contains(havep, needp))) return (cap_check_failed_notcapable(havep, needp)); return (0); } static inline int cap_check_inline_transient(const cap_rights_t *havep, const cap_rights_t *needp) { if (__predict_false(!cap_rights_contains(havep, needp))) return (1); return (0); } #else bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little); #endif __END_DECLS #ifdef _KERNEL #include +#ifdef KTRACE +#define CAP_TRACING(td) KTRPOINT((td), KTR_CAPFAIL) +#else +#define CAP_TRACING(td) 0 +#endif + #define IN_CAPABILITY_MODE(td) (((td)->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) struct filedesc; struct filedescent; /* * Test whether a capability grants the requested rights. */ int cap_check(const cap_rights_t *havep, const cap_rights_t *needp); /* * Convert capability rights into VM access flags. */ vm_prot_t cap_rights_to_vmprot(const cap_rights_t *havep); /* * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to * extract the rights from a capability. * * Dereferencing fdep requires filedesc.h, but including it would cause * significant pollution. Instead add a macro for consumers which want it, * most notably kern_descrip.c. */ #define cap_rights_fde_inline(fdep) (&(fdep)->fde_rights) const cap_rights_t *cap_rights_fde(const struct filedescent *fde); const cap_rights_t *cap_rights(struct filedesc *fdp, int fd); int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd); int cap_fcntl_check_fde(struct filedescent *fde, int cmd); int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd); extern bool trap_enotcap; #else /* !_KERNEL */ __BEGIN_DECLS /* * cap_enter(): Cause the process to enter capability mode, which will * prevent it from directly accessing global namespaces. System calls will * be limited to process-local, process-inherited, or file descriptor * operations. If already in capability mode, a no-op. */ int cap_enter(void); /* * Are we sandboxed (in capability mode)? * This is a libc wrapper around the cap_getmode(2) system call. */ bool cap_sandboxed(void); /* * cap_getmode(): Are we in capability mode? */ int cap_getmode(u_int *modep); /* * Limits capability rights for the given descriptor (CAP_*). */ int cap_rights_limit(int fd, const cap_rights_t *rights); /* * Returns capability rights for the given descriptor. */ #define cap_rights_get(fd, rights) \ __cap_rights_get(CAP_RIGHTS_VERSION, (fd), (rights)) int __cap_rights_get(int version, int fd, cap_rights_t *rights); /* * Limits allowed ioctls for the given descriptor. */ int cap_ioctls_limit(int fd, const cap_ioctl_t *cmds, size_t ncmds); /* * Returns array of allowed ioctls for the given descriptor. * If all ioctls are allowed, the cmds array is not populated and * the function returns CAP_IOCTLS_ALL. */ ssize_t cap_ioctls_get(int fd, cap_ioctl_t *cmds, size_t maxcmds); /* * Limits allowed fcntls for the given descriptor (CAP_FCNTL_*). */ int cap_fcntls_limit(int fd, uint32_t fcntlrights); /* * Returns bitmask of allowed fcntls for the given descriptor. */ int cap_fcntls_get(int fd, uint32_t *fcntlrightsp); __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_CAPSICUM_H_ */ diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index 594f912b02ef..6d8280c415a6 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -1,355 +1,371 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ktrace.h 8.1 (Berkeley) 6/2/93 */ #ifndef _SYS_KTRACE_H_ #define _SYS_KTRACE_H_ +#include #include #include +#include #include /* * operations to ktrace system call (KTROP(op)) */ #define KTROP_SET 0 /* set trace points */ #define KTROP_CLEAR 1 /* clear trace points */ #define KTROP_CLEARFILE 2 /* stop all tracing to file */ #define KTROP(o) ((o)&3) /* macro to extract operation */ /* * flags (ORed in with operation) */ #define KTRFLAG_DESCEND 4 /* perform op on all children too */ /* * ktrace record header */ struct ktr_header_v0 { int ktr_len; /* length of buf */ short ktr_type; /* trace record type */ pid_t ktr_pid; /* process id */ char ktr_comm[MAXCOMLEN + 1];/* command name */ struct timeval ktr_time; /* timestamp */ long ktr_tid; /* thread id */ }; struct ktr_header { int ktr_len; /* length of buf */ short ktr_type; /* trace record type */ short ktr_version; /* ktr_header version */ pid_t ktr_pid; /* process id */ char ktr_comm[MAXCOMLEN + 1];/* command name */ struct timespec ktr_time; /* timestamp */ /* XXX: make ktr_tid an lwpid_t on next ABI break */ long ktr_tid; /* thread id */ int ktr_cpu; /* cpu id */ }; #define KTR_VERSION0 0 #define KTR_VERSION1 1 #define KTR_OFFSET_V0 sizeof(struct ktr_header_v0) - \ sizeof(struct ktr_header) /* * Test for kernel trace point (MP SAFE). * * KTRCHECK() just checks that the type is enabled and is only for * internal use in the ktrace subsystem. KTRPOINT() checks against * ktrace recursion as well as checking that the type is enabled and * is the public interface. */ #define KTRCHECK(td, type) ((td)->td_proc->p_traceflag & (1 << type)) #define KTRPOINT(td, type) (__predict_false(KTRCHECK((td), (type)))) #define KTRCHECKDRAIN(td) (!(STAILQ_EMPTY(&(td)->td_proc->p_ktr))) #define KTRUSERRET(td) do { \ if (__predict_false(KTRCHECKDRAIN(td))) \ ktruserret(td); \ } while (0) /* * ktrace record types */ /* * KTR_SYSCALL - system call record */ #define KTR_SYSCALL 1 struct ktr_syscall { short ktr_code; /* syscall number */ short ktr_narg; /* number of arguments */ /* * followed by ktr_narg register_t */ register_t ktr_args[1]; }; /* * KTR_SYSRET - return from system call record */ #define KTR_SYSRET 2 struct ktr_sysret { short ktr_code; short ktr_eosys; int ktr_error; register_t ktr_retval; }; /* * KTR_NAMEI - namei record */ #define KTR_NAMEI 3 /* record contains pathname */ /* * KTR_GENIO - trace generic process i/o */ #define KTR_GENIO 4 struct ktr_genio { int ktr_fd; enum uio_rw ktr_rw; /* * followed by data successfully read/written */ }; /* * KTR_PSIG - trace processed signal */ #define KTR_PSIG 5 struct ktr_psig { int signo; sig_t action; int code; sigset_t mask; }; /* * KTR_CSW - trace context switches */ #define KTR_CSW 6 struct ktr_csw_old { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ }; struct ktr_csw { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ char wmesg[8]; }; /* * KTR_USER - data coming from userland */ #define KTR_USER_MAXLEN 2048 /* maximum length of passed data */ #define KTR_USER 7 /* * KTR_STRUCT - misc. structs */ #define KTR_STRUCT 8 /* * record contains null-terminated struct name followed by * struct contents */ struct sockaddr; struct stat; struct sysentvec; /* * KTR_SYSCTL - name of a sysctl MIB */ #define KTR_SYSCTL 9 /* record contains null-terminated MIB name */ /* * KTR_PROCCTOR - trace process creation (multiple ABI support) */ #define KTR_PROCCTOR 10 struct ktr_proc_ctor { u_int sv_flags; /* struct sysentvec sv_flags copy */ }; /* * KTR_PROCDTOR - trace process destruction (multiple ABI support) */ #define KTR_PROCDTOR 11 /* * KTR_CAPFAIL - trace capability check failures */ #define KTR_CAPFAIL 12 -enum ktr_cap_fail_type { +enum ktr_cap_violation { CAPFAIL_NOTCAPABLE, /* insufficient capabilities in cap_check() */ - CAPFAIL_INCREASE, /* attempt to increase capabilities */ + CAPFAIL_INCREASE, /* attempt to increase rights on a capability */ CAPFAIL_SYSCALL, /* disallowed system call */ - CAPFAIL_LOOKUP, /* disallowed VFS lookup */ + CAPFAIL_SIGNAL, /* sent signal to process other than self */ + CAPFAIL_PROTO, /* disallowed protocol */ + CAPFAIL_SOCKADDR, /* restricted address lookup */ + CAPFAIL_NAMEI, /* restricted namei lookup */ + CAPFAIL_CPUSET, /* restricted CPU set modification */ }; + +union ktr_cap_data { + cap_rights_t cap_rights[2]; +#define cap_needed cap_rights[0] +#define cap_held cap_rights[1] + int cap_int; + struct sockaddr cap_sockaddr; + char cap_path[MAXPATHLEN]; +}; + struct ktr_cap_fail { - enum ktr_cap_fail_type cap_type; - cap_rights_t cap_needed; - cap_rights_t cap_held; + enum ktr_cap_violation cap_type; + short cap_code; + u_int cap_svflags; + union ktr_cap_data cap_data; }; /* * KTR_FAULT - page fault record */ #define KTR_FAULT 13 struct ktr_fault { vm_offset_t vaddr; int type; }; /* * KTR_FAULTEND - end of page fault record */ #define KTR_FAULTEND 14 struct ktr_faultend { int result; }; /* * KTR_STRUCT_ARRAY - array of misc. structs */ #define KTR_STRUCT_ARRAY 15 struct ktr_struct_array { size_t struct_size; /* * Followed by null-terminated structure name and then payload * contents. */ }; /* * KTR_DROP - If this bit is set in ktr_type, then at least one event * between the previous record and this record was dropped. */ #define KTR_DROP 0x8000 /* * KTR_VERSIONED - If this bit is set in ktr_type, then the kernel * exposes the new struct ktr_header (versioned), otherwise the old * struct ktr_header_v0 is exposed. */ #define KTR_VERSIONED 0x4000 #define KTR_TYPE (KTR_DROP | KTR_VERSIONED) /* * kernel trace points (in p_traceflag) */ #define KTRFAC_MASK 0x00ffffff #define KTRFAC_SYSCALL (1<sa_len) #define ktrstat(s) \ ktrstruct("stat", (s), sizeof(struct stat)) #define ktrstat_error(s, error) \ ktrstruct_error("stat", (s), sizeof(struct stat), error) #define ktrcpuset(s, l) \ ktrstruct("cpuset_t", (s), l) extern u_int ktr_geniosize; #ifdef KTRACE extern int ktr_filesize_limit_signal; #define __ktrace_used #else #define ktr_filesize_limit_signal 0 #define __ktrace_used __unused #endif #else #include __BEGIN_DECLS int ktrace(const char *, int, int, pid_t); int utrace(const void *, size_t); __END_DECLS #endif #endif diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index d69c019bd6be..abbe57b0b75e 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -1,2350 +1,2389 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)kdump.c 8.1 (Berkeley) 6/6/93"; #endif #endif /* not lint */ #include #define _WANT_KERNEL_ERRNO #ifdef __LP64__ #define _WANT_KEVENT32 #endif #define _WANT_FREEBSD11_KEVENT #define _WANT_FREEBSD_BITSET #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef WITH_CASPER #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ktrace.h" #include "kdump.h" #ifdef WITH_CASPER #include #include #include #endif int fetchprocinfo(struct ktr_header *, u_int *); u_int findabi(struct ktr_header *); int fread_tail(void *, int, int); void dumpheader(struct ktr_header *, u_int); void dumptimeval(struct ktr_header_v0 *kth); void dumptimespec(struct ktr_header *kth); void ktrsyscall(struct ktr_syscall *, u_int); void ktrsysret(struct ktr_sysret *, u_int); void ktrnamei(char *, int); void hexdump(char *, int, int); void visdump(char *, int, int); void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); void ktritimerval(struct itimerval *it); void ktrsockaddr(struct sockaddr *); void ktrstat(struct stat *); void ktrstruct(char *, size_t); void ktrcapfail(struct ktr_cap_fail *); void ktrfault(struct ktr_fault *); void ktrfaultend(struct ktr_faultend *); void ktrkevent(struct kevent *); void ktrpollfd(struct pollfd *); void ktrstructarray(struct ktr_struct_array *, size_t); void ktrbitset(char *, struct bitset *, size_t); void ktrsyscall_freebsd(struct ktr_syscall *ktr, register_t **resip, int *resnarg, char *resc, u_int sv_flags); void usage(void); #define TIMESTAMP_NONE 0x0 #define TIMESTAMP_ABSOLUTE 0x1 #define TIMESTAMP_ELAPSED 0x2 #define TIMESTAMP_RELATIVE 0x4 bool decimal, fancy = true, resolv; static bool abiflag, suppressdata, syscallno, tail, threads, cpuflag; static int timestamp, maxdata; static const char *tracefile = DEF_TRACEFILE; static struct ktr_header ktr_header; static short version; #define TIME_FORMAT "%b %e %T %Y" #define eqs(s1, s2) (strcmp((s1), (s2)) == 0) struct proc_info { TAILQ_ENTRY(proc_info) info; u_int sv_flags; pid_t pid; }; static TAILQ_HEAD(trace_procs, proc_info) trace_procs; #ifdef WITH_CASPER static cap_channel_t *cappwd, *capgrp; static int cappwdgrp_setup(cap_channel_t **cappwdp, cap_channel_t **capgrpp) { cap_channel_t *capcas, *cappwdloc, *capgrploc; const char *cmds[1], *fields[1]; capcas = cap_init(); if (capcas == NULL) { err(1, "unable to create casper process"); exit(1); } cappwdloc = cap_service_open(capcas, "system.pwd"); capgrploc = cap_service_open(capcas, "system.grp"); /* Casper capability no longer needed. */ cap_close(capcas); if (cappwdloc == NULL || capgrploc == NULL) { if (cappwdloc == NULL) warn("unable to open system.pwd service"); if (capgrploc == NULL) warn("unable to open system.grp service"); exit(1); } /* Limit system.pwd to only getpwuid() function and pw_name field. */ cmds[0] = "getpwuid"; if (cap_pwd_limit_cmds(cappwdloc, cmds, 1) < 0) err(1, "unable to limit system.pwd service"); fields[0] = "pw_name"; if (cap_pwd_limit_fields(cappwdloc, fields, 1) < 0) err(1, "unable to limit system.pwd service"); /* Limit system.grp to only getgrgid() function and gr_name field. */ cmds[0] = "getgrgid"; if (cap_grp_limit_cmds(capgrploc, cmds, 1) < 0) err(1, "unable to limit system.grp service"); fields[0] = "gr_name"; if (cap_grp_limit_fields(capgrploc, fields, 1) < 0) err(1, "unable to limit system.grp service"); *cappwdp = cappwdloc; *capgrpp = capgrploc; return (0); } #endif /* WITH_CASPER */ void print_integer_arg(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("", value); else printf("", value); } } /* Like print_integer_arg but unknown values are treated as valid. */ void print_integer_arg_valid(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("%d", value); else printf("%#x", value); } } bool print_mask_arg_part(bool (*decoder)(FILE *, int, int *), int value, int *rem) { printf("%#x<", value); return (decoder(stdout, value, rem)); } void print_mask_arg(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; invalid = !print_mask_arg_part(decoder, value, &rem); printf(">"); if (invalid) printf("%u", rem); } void print_mask_arg0(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_fileflags(fflags_t value) { bool invalid; fflags_t rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !sysdecode_fileflags(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } void decode_filemode(int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#o<", value); invalid = !sysdecode_filemode(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), uint32_t value) { bool invalid; uint32_t rem; printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } void print_mask_argul(bool (*decoder)(FILE *, u_long, u_long *), u_long value) { bool invalid; u_long rem; if (value == 0) { printf("0"); return; } printf("%#lx<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%lu", rem); } int main(int argc, char *argv[]) { int ch, ktrlen, size; void *m; int trpoints = ALL_POINTS; int drop_logged; pid_t pid = 0; u_int sv_flags; setlocale(LC_CTYPE, ""); timestamp = TIMESTAMP_NONE; while ((ch = getopt(argc,argv,"f:cdElm:np:AHRrSsTt:")) != -1) switch (ch) { case 'A': abiflag = true; break; case 'f': tracefile = optarg; break; case 'c': cpuflag = true; break; case 'd': decimal = true; break; case 'l': tail = true; break; case 'm': maxdata = atoi(optarg); break; case 'n': fancy = false; break; case 'p': pid = atoi(optarg); break; case 'r': resolv = true; break; case 'S': syscallno = true; break; case 's': suppressdata = true; break; case 'E': timestamp |= TIMESTAMP_ELAPSED; break; case 'H': threads = true; break; case 'R': timestamp |= TIMESTAMP_RELATIVE; break; case 'T': timestamp |= TIMESTAMP_ABSOLUTE; break; case 't': trpoints = getpoints(optarg); if (trpoints < 0) errx(1, "unknown trace point in %s", optarg); break; default: usage(); } if (argc > optind) usage(); m = malloc(size = 1025); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); if (strcmp(tracefile, "-") != 0) if (!freopen(tracefile, "r", stdin)) err(1, "%s", tracefile); caph_cache_catpages(); caph_cache_tzdata(); #ifdef WITH_CASPER if (resolv) { if (cappwdgrp_setup(&cappwd, &capgrp) < 0) { cappwd = NULL; capgrp = NULL; } } if (!resolv || (cappwd != NULL && capgrp != NULL)) { if (caph_enter() < 0) err(1, "unable to enter capability mode"); } #else if (!resolv) { if (caph_enter() < 0) err(1, "unable to enter capability mode"); } #endif if (caph_limit_stdio() == -1) err(1, "unable to limit stdio"); TAILQ_INIT(&trace_procs); drop_logged = 0; while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) { if (ktr_header.ktr_type & KTR_VERSIONED) { ktr_header.ktr_type &= ~KTR_VERSIONED; version = ktr_header.ktr_version; } else version = KTR_VERSION0; if (ktr_header.ktr_type & KTR_DROP) { ktr_header.ktr_type &= ~KTR_DROP; if (!drop_logged && threads) { printf( "%6d %6d %-8.*s Events dropped.\n", ktr_header.ktr_pid, ktr_header.ktr_tid > 0 ? (lwpid_t)ktr_header.ktr_tid : 0, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } else if (!drop_logged) { printf("%6d %-8.*s Events dropped.\n", ktr_header.ktr_pid, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } } if ((ktrlen = ktr_header.ktr_len) < 0) errx(1, "bogus length 0x%x", ktrlen); if (ktrlen > size) { m = realloc(m, ktrlen+1); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); size = ktrlen; } if (version == KTR_VERSION0 && fseek(stdin, KTR_OFFSET_V0, SEEK_CUR) < 0) errx(1, "%s", strerror(errno)); if (ktrlen && fread_tail(m, ktrlen, 1) == 0) errx(1, "data too short"); if (fetchprocinfo(&ktr_header, (u_int *)m) != 0) continue; if (pid && ktr_header.ktr_pid != pid && ktr_header.ktr_tid != pid) continue; if ((trpoints & (1<ktr_type) { case KTR_PROCCTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); break; } } pi = malloc(sizeof(struct proc_info)); if (pi == NULL) errx(1, "%s", strerror(ENOMEM)); pi->sv_flags = *flags; pi->pid = kth->ktr_pid; TAILQ_INSERT_TAIL(&trace_procs, pi, info); return (1); case KTR_PROCDTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); free(pi); break; } } return (1); } return (0); } u_int findabi(struct ktr_header *kth) { struct proc_info *pi; TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { return (pi->sv_flags); } } return (0); } void dumptimeval(struct ktr_header_v0 *kth) { static struct timeval prevtime, prevtime_e; struct timeval temp; const char *sign; if (timestamp & TIMESTAMP_ABSOLUTE) { printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); } if (timestamp & TIMESTAMP_ELAPSED) { if (prevtime_e.tv_sec == 0) prevtime_e = kth->ktr_time; timersub(&kth->ktr_time, &prevtime_e, &temp); printf("%jd.%06ld ", (intmax_t)temp.tv_sec, temp.tv_usec); } if (timestamp & TIMESTAMP_RELATIVE) { if (prevtime.tv_sec == 0) prevtime = kth->ktr_time; if (timercmp(&kth->ktr_time, &prevtime, <)) { timersub(&prevtime, &kth->ktr_time, &temp); sign = "-"; } else { timersub(&kth->ktr_time, &prevtime, &temp); sign = ""; } prevtime = kth->ktr_time; printf("%s%jd.%06ld ", sign, (intmax_t)temp.tv_sec, temp.tv_usec); } } void dumptimespec(struct ktr_header *kth) { static struct timespec prevtime, prevtime_e; struct timespec temp; const char *sign; if (timestamp & TIMESTAMP_ABSOLUTE) { printf("%jd.%09ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_nsec); } if (timestamp & TIMESTAMP_ELAPSED) { if (prevtime_e.tv_sec == 0) prevtime_e = kth->ktr_time; timespecsub(&kth->ktr_time, &prevtime_e, &temp); printf("%jd.%09ld ", (intmax_t)temp.tv_sec, temp.tv_nsec); } if (timestamp & TIMESTAMP_RELATIVE) { if (prevtime.tv_sec == 0) prevtime = kth->ktr_time; if (timespeccmp(&kth->ktr_time, &prevtime, <)) { timespecsub(&prevtime, &kth->ktr_time, &temp); sign = "-"; } else { timespecsub(&kth->ktr_time, &prevtime, &temp); sign = ""; } prevtime = kth->ktr_time; printf("%s%jd.%09ld ", sign, (intmax_t)temp.tv_sec, temp.tv_nsec); } } void dumpheader(struct ktr_header *kth, u_int sv_flags) { static char unknown[64]; const char *abi; const char *arch; const char *type; switch (kth->ktr_type) { case KTR_SYSCALL: type = "CALL"; break; case KTR_SYSRET: type = "RET "; break; case KTR_NAMEI: type = "NAMI"; break; case KTR_GENIO: type = "GIO "; break; case KTR_PSIG: type = "PSIG"; break; case KTR_CSW: type = "CSW "; break; case KTR_USER: type = "USER"; break; case KTR_STRUCT: case KTR_STRUCT_ARRAY: type = "STRU"; break; case KTR_SYSCTL: type = "SCTL"; break; case KTR_CAPFAIL: type = "CAP "; break; case KTR_FAULT: type = "PFLT"; break; case KTR_FAULTEND: type = "PRET"; break; default: sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type); type = unknown; } /* * The ktr_tid field was previously the ktr_buffer field, which held * the kernel pointer value for the buffer associated with data * following the record header. It now holds a threadid, but only * for trace files after the change. Older trace files still contain * kernel pointers. Detect this and suppress the results by printing * negative tid's as 0. */ if (threads) printf("%6d %6d %-8.*s ", kth->ktr_pid, kth->ktr_tid > 0 ? (lwpid_t)kth->ktr_tid : 0, MAXCOMLEN, kth->ktr_comm); else printf("%6d %-8.*s ", kth->ktr_pid, MAXCOMLEN, kth->ktr_comm); if (timestamp) { if (version == KTR_VERSION0) dumptimeval((struct ktr_header_v0 *)kth); else dumptimespec(kth); } if (cpuflag && version > KTR_VERSION0) printf("%3d ", kth->ktr_cpu); printf("%s ", type); if (abiflag != 0) { switch (sv_flags & SV_ABI_MASK) { case SV_ABI_LINUX: abi = "L"; break; case SV_ABI_FREEBSD: abi = "F"; break; default: abi = "U"; break; } if ((sv_flags & SV_LP64) != 0) arch = "64"; else if ((sv_flags & SV_ILP32) != 0) arch = "32"; else arch = "00"; printf("%s%s ", abi, arch); } } #include static void ioctlname(unsigned long val) { const char *str; str = sysdecode_ioctlname(val); if (str != NULL) printf("%s", str); else if (decimal) printf("%lu", val); else printf("%#lx", val); } static enum sysdecode_abi syscallabi(u_int sv_flags) { if (sv_flags == 0) return (SYSDECODE_ABI_FREEBSD); switch (sv_flags & SV_ABI_MASK) { case SV_ABI_FREEBSD: return (SYSDECODE_ABI_FREEBSD); case SV_ABI_LINUX: #ifdef __LP64__ if (sv_flags & SV_ILP32) return (SYSDECODE_ABI_LINUX32); #endif return (SYSDECODE_ABI_LINUX); default: return (SYSDECODE_ABI_UNKNOWN); } } static void syscallname(u_int code, u_int sv_flags) { const char *name; name = sysdecode_syscallname(syscallabi(sv_flags), code); if (name == NULL) printf("[%d]", code); else { printf("%s", name); if (syscallno) printf("[%d]", code); } } static void print_signal(int signo) { const char *signame; signame = sysdecode_signal(signo); if (signame != NULL) printf("%s", signame); else printf("SIG %d", signo); } void ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags) { int narg = ktr->ktr_narg; register_t *ip; syscallname(ktr->ktr_code, sv_flags); ip = &ktr->ktr_args[0]; if (narg) { char c = '('; if (fancy) { switch (sv_flags & SV_ABI_MASK) { case SV_ABI_FREEBSD: ktrsyscall_freebsd(ktr, &ip, &narg, &c, sv_flags); break; #ifdef SYSDECODE_HAVE_LINUX case SV_ABI_LINUX: #ifdef __amd64__ if (sv_flags & SV_ILP32) ktrsyscall_linux32(ktr, &ip, &narg, &c); else #endif ktrsyscall_linux(ktr, &ip, &narg, &c); break; #endif /* SYSDECODE_HAVE_LINUX */ } } while (narg > 0) print_number(ip, narg, c); putchar(')'); } putchar('\n'); } void ktrsyscall_freebsd(struct ktr_syscall *ktr, register_t **resip, int *resnarg, char *resc, u_int sv_flags) { int narg = ktr->ktr_narg; register_t *ip, *first; intmax_t arg; int quad_align, quad_slots; ip = first = &ktr->ktr_args[0]; char c = *resc; quad_align = 0; if (sv_flags & SV_ILP32) { #ifdef __powerpc__ quad_align = 1; #endif quad_slots = 2; } else quad_slots = 1; switch (ktr->ktr_code) { case SYS_bindat: case SYS_chflagsat: case SYS_connectat: case SYS_faccessat: case SYS_fchmodat: case SYS_fchownat: case SYS_fstatat: case SYS_futimesat: case SYS_linkat: case SYS_mkdirat: case SYS_mkfifoat: case SYS_mknodat: case SYS_openat: case SYS_readlinkat: case SYS_renameat: case SYS_unlinkat: case SYS_utimensat: putchar('('); print_integer_arg_valid(sysdecode_atfd, *ip); c = ','; ip++; narg--; break; } switch (ktr->ktr_code) { case SYS_ioctl: { print_number(ip, narg, c); putchar(c); ioctlname(*ip); c = ','; ip++; narg--; break; } case SYS_ptrace: putchar('('); print_integer_arg(sysdecode_ptrace_request, *ip); c = ','; ip++; narg--; break; case SYS_access: case SYS_eaccess: case SYS_faccessat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_access_mode, *ip); ip++; narg--; break; case SYS_close_range: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_close_range_flags, *ip); ip += 3; narg -= 3; break; case SYS_open: case SYS_openat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); if ((ip[0] & O_CREAT) == O_CREAT) { putchar(','); decode_filemode(ip[1]); } ip += 2; narg -= 2; break; case SYS_wait4: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_wait4_options, *ip); ip++; narg--; break; case SYS_wait6: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_wait6_options, *ip); ip++; narg--; break; case SYS_chmod: case SYS_fchmod: case SYS_lchmod: case SYS_fchmodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_mknodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_getfsstat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_getfsstat_mode, *ip); ip++; narg--; break; case SYS_mount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_unmount: print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_recvmsg: case SYS_sendmsg: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_recvfrom: case SYS_sendto: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_chflags: case SYS_chflagsat: case SYS_fchflags: case SYS_lchflags: print_number(ip, narg, c); putchar(','); decode_fileflags(*ip); ip++; narg--; break; case SYS_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_reboot: putchar('('); print_mask_arg(sysdecode_reboot_howto, *ip); ip++; narg--; break; case SYS_umask: putchar('('); decode_filemode(*ip); ip++; narg--; break; case SYS_msync: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_msync_flags, *ip); ip++; narg--; break; #ifdef SYS_freebsd6_mmap case SYS_freebsd6_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; #endif case SYS_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; case SYS_mprotect: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); ip++; narg--; break; case SYS_madvise: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_madvice, *ip); ip++; narg--; break; case SYS_pathconf: case SYS_lpathconf: case SYS_fpathconf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_pathconf_name, *ip); ip++; narg--; break; case SYS_getpriority: case SYS_setpriority: putchar('('); print_integer_arg(sysdecode_prio_which, *ip); c = ','; ip++; narg--; break; case SYS_fcntl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_fcntl_cmd, ip[0]); if (sysdecode_fcntl_arg_p(ip[0])) { putchar(','); if (ip[0] == F_SETFL) print_mask_arg( sysdecode_fcntl_fileflags, ip[1]); else sysdecode_fcntl_arg(stdout, ip[0], ip[1], decimal ? 10 : 16); } ip += 2; narg -= 2; break; case SYS_socket: { int sockdomain; putchar('('); sockdomain = *ip; print_integer_arg(sysdecode_socketdomain, sockdomain); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; if (sockdomain == PF_INET || sockdomain == PF_INET6) { putchar(','); print_integer_arg(sysdecode_ipproto, *ip); ip++; narg--; } c = ','; break; } case SYS_setsockopt: case SYS_getsockopt: { const char *str; print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_sockopt_level, *ip); str = sysdecode_sockopt_name(ip[0], ip[1]); if (str != NULL) { printf(",%s", str); ip++; narg--; } ip++; narg--; break; } #ifdef SYS_freebsd6_lseek case SYS_freebsd6_lseek: print_number(ip, narg, c); /* Hidden 'pad' argument, not in lseek(2) */ print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; #endif case SYS_lseek: print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; case SYS_flock: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_flock_operation, *ip); ip++; narg--; break; case SYS_mkfifo: case SYS_mkfifoat: case SYS_mkdir: case SYS_mkdirat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_shutdown: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shutdown_how, *ip); ip++; narg--; break; case SYS_socketpair: putchar('('); print_integer_arg(sysdecode_socketdomain, *ip); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; c = ','; break; case SYS_getrlimit: case SYS_setrlimit: putchar('('); print_integer_arg(sysdecode_rlimit, *ip); ip++; narg--; c = ','; break; case SYS_getrusage: putchar('('); print_integer_arg(sysdecode_getrusage_who, *ip); ip++; narg--; c = ','; break; case SYS_quotactl: print_number(ip, narg, c); putchar(','); if (!sysdecode_quotactl_cmd(stdout, *ip)) { if (decimal) printf("", (int)*ip); else printf("", (int)*ip); } ip++; narg--; c = ','; break; case SYS_nfssvc: putchar('('); print_integer_arg(sysdecode_nfssvc_flags, *ip); ip++; narg--; c = ','; break; case SYS_rtprio: case SYS_rtprio_thread: putchar('('); print_integer_arg(sysdecode_rtprio_function, *ip); ip++; narg--; c = ','; break; case SYS___semctl: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_semctl_cmd, *ip); ip++; narg--; break; case SYS_semget: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_semget_flags, *ip); ip++; narg--; break; case SYS_msgctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_msgctl_cmd, *ip); ip++; narg--; break; case SYS_shmat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_shmat_flags, *ip); ip++; narg--; break; case SYS_shmctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shmctl_cmd, *ip); ip++; narg--; break; #ifdef SYS_freebsd12_shm_open case SYS_freebsd12_shm_open: if (ip[0] == (uintptr_t)SHM_ANON) { printf("(SHM_ANON"); ip++; } else { print_number(ip, narg, c); } putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); ip += 2; narg -= 2; break; #endif case SYS_shm_open2: if (ip[0] == (uintptr_t)SHM_ANON) { printf("(SHM_ANON"); ip++; } else { print_number(ip, narg, c); } putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); putchar(','); print_mask_arg(sysdecode_shmflags, ip[2]); ip += 3; narg -= 3; break; case SYS_minherit: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_minherit_inherit, *ip); ip++; narg--; break; case SYS_rfork: putchar('('); print_mask_arg(sysdecode_rfork_flags, *ip); ip++; narg--; c = ','; break; case SYS_lio_listio: putchar('('); print_integer_arg(sysdecode_lio_listio_mode, *ip); ip++; narg--; c = ','; break; case SYS_mlockall: putchar('('); print_mask_arg(sysdecode_mlockall_flags, *ip); ip++; narg--; break; case SYS_sched_setscheduler: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sched_get_priority_max: case SYS_sched_get_priority_min: putchar('('); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sendfile: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_sendfile_flags, *ip); ip++; narg--; break; case SYS_kldsym: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldsym_cmd, *ip); ip++; narg--; break; case SYS_sigprocmask: putchar('('); print_integer_arg(sysdecode_sigprocmask_how, *ip); ip++; narg--; c = ','; break; case SYS___acl_get_file: case SYS___acl_set_file: case SYS___acl_get_fd: case SYS___acl_set_fd: case SYS___acl_delete_file: case SYS___acl_delete_fd: case SYS___acl_aclcheck_file: case SYS___acl_aclcheck_fd: case SYS___acl_get_link: case SYS___acl_set_link: case SYS___acl_delete_link: case SYS___acl_aclcheck_link: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_acltype, *ip); ip++; narg--; break; case SYS_sigaction: putchar('('); print_signal(*ip); ip++; narg--; c = ','; break; case SYS_extattrctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_extattrnamespace, *ip); ip++; narg--; break; case SYS_nmount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_thr_create: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_thr_create_flags, *ip); ip++; narg--; break; case SYS_thr_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_kldunloadf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldunload_flags, *ip); ip++; narg--; break; case SYS_linkat: case SYS_renameat: case SYS_symlinkat: print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_atfd, *ip); ip++; narg--; print_number(ip, narg, c); break; case SYS_cap_fcntls_limit: print_number(ip, narg, c); putchar(','); arg = *ip; ip++; narg--; print_mask_arg32(sysdecode_cap_fcntlrights, arg); break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); (void)putchar(','); print_integer_arg(sysdecode_fadvice, *ip); ip++; narg--; break; case SYS_procctl: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_procctl_cmd, *ip); ip++; narg--; break; case SYS__umtx_op: { int op; print_number(ip, narg, c); putchar(','); if (print_mask_arg_part(sysdecode_umtx_op_flags, *ip, &op)) putchar('|'); print_integer_arg(sysdecode_umtx_op, op); putchar('>'); switch (*ip) { case UMTX_OP_CV_WAIT: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_cvwait_flags, *ip); break; case UMTX_OP_RW_RDLOCK: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_rwlock_flags, *ip); break; } ip++; narg--; break; } case SYS_ftruncate: case SYS_truncate: print_number(ip, narg, c); print_number64(first, ip, narg, c); break; case SYS_fchownat: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); break; case SYS_fstatat: case SYS_utimensat: print_number(ip, narg, c); print_number(ip, narg, c); break; case SYS_unlinkat: print_number(ip, narg, c); break; case SYS_sysarch: putchar('('); print_integer_arg(sysdecode_sysarch_number, *ip); ip++; narg--; c = ','; break; case SYS_getitimer: case SYS_setitimer: putchar('('); print_integer_arg(sysdecode_itimer, *ip); ip++; narg--; c = ','; break; } switch (ktr->ktr_code) { case SYS_chflagsat: case SYS_fchownat: case SYS_faccessat: case SYS_fchmodat: case SYS_fstatat: case SYS_linkat: case SYS_unlinkat: case SYS_utimensat: putchar(','); print_mask_arg0(sysdecode_atflags, *ip); ip++; narg--; break; } *resc = c; *resip = ip; *resnarg = narg; } void ktrsysret(struct ktr_sysret *ktr, u_int sv_flags) { register_t ret = ktr->ktr_retval; int error = ktr->ktr_error; syscallname(ktr->ktr_code, sv_flags); printf(" "); if (error == 0) { if (fancy) { printf("%ld", (long)ret); if (ret < 0 || ret > 9) printf("/%#lx", (unsigned long)ret); } else { if (decimal) printf("%ld", (long)ret); else printf("%#lx", (unsigned long)ret); } } else if (error == ERESTART) printf("RESTART"); else if (error == EJUSTRETURN) printf("JUSTRETURN"); else { printf("-1 errno %d", sysdecode_freebsd_to_abi_errno( syscallabi(sv_flags), error)); if (fancy) printf(" %s", strerror(ktr->ktr_error)); } putchar('\n'); } void ktrnamei(char *cp, int len) { printf("\"%.*s\"\n", len, cp); } void hexdump(char *p, int len, int screenwidth) { int n, i; int width; width = 0; do { width += 2; i = 13; /* base offset */ i += (width / 2) + 1; /* spaces every second byte */ i += (width * 2); /* width of bytes */ i += 3; /* " |" */ i += width; /* each byte */ i += 1; /* "|" */ } while (i < screenwidth); width -= 2; for (n = 0; n < len; n += width) { for (i = n; i < n + width; i++) { if ((i % width) == 0) { /* beginning of line */ printf(" 0x%04x", i); } if ((i % 2) == 0) { printf(" "); } if (i < len) printf("%02x", p[i] & 0xff); else printf(" "); } printf(" |"); for (i = n; i < n + width; i++) { if (i >= len) break; if (p[i] >= ' ' && p[i] <= '~') printf("%c", p[i]); else printf("."); } printf("|\n"); } if ((i % width) != 0) printf("\n"); } void visdump(char *dp, int datalen, int screenwidth) { int col = 0; char *cp; int width; char visbuf[5]; printf(" \""); col = 8; for (;datalen > 0; datalen--, dp++) { vis(visbuf, *dp, VIS_CSTYLE | VIS_NOLOCALE, *(dp+1)); cp = visbuf; /* * Keep track of printables and * space chars (like fold(1)). */ if (col == 0) { putchar('\t'); col = 8; } switch(*cp) { case '\n': col = 0; putchar('\n'); continue; case '\t': width = 8 - (col&07); break; default: width = strlen(cp); } if (col + width > (screenwidth-2)) { printf("\\\n\t"); col = 8; } col += width; do { putchar(*cp++); } while (*cp); } if (col == 0) printf(" "); printf("\"\n"); } void ktrgenio(struct ktr_genio *ktr, int len) { int datalen = len - sizeof (struct ktr_genio); char *dp = (char *)ktr + sizeof (struct ktr_genio); static int screenwidth = 0; int i, binary; printf("fd %d %s %d byte%s\n", ktr->ktr_fd, ktr->ktr_rw == UIO_READ ? "read" : "wrote", datalen, datalen == 1 ? "" : "s"); if (suppressdata) return; if (screenwidth == 0) { struct winsize ws; if (fancy && ioctl(fileno(stderr), TIOCGWINSZ, &ws) != -1 && ws.ws_col > 8) screenwidth = ws.ws_col; else screenwidth = 80; } if (maxdata && datalen > maxdata) datalen = maxdata; for (i = 0, binary = 0; i < datalen && binary == 0; i++) { if (dp[i] >= 32 && dp[i] < 127) continue; if (dp[i] == 10 || dp[i] == 13 || dp[i] == 0 || dp[i] == 9) continue; binary = 1; } if (binary) hexdump(dp, datalen, screenwidth); else visdump(dp, datalen, screenwidth); } void ktrpsig(struct ktr_psig *psig) { const char *str; print_signal(psig->signo); if (psig->action == SIG_DFL) { printf(" SIG_DFL"); } else { printf(" caught handler=0x%lx mask=0x%x", (u_long)psig->action, psig->mask.__bits[0]); } printf(" code="); str = sysdecode_sigcode(psig->signo, psig->code); if (str != NULL) printf("%s", str); else printf("", psig->code); putchar('\n'); } void ktrcsw_old(struct ktr_csw_old *cs) { printf("%s %s\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel"); } void ktrcsw(struct ktr_csw *cs) { printf("%s %s \"%s\"\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel", cs->wmesg); } void ktruser(int len, void *p) { unsigned char *cp; if (sysdecode_utrace(stdout, p, len)) { printf("\n"); return; } printf("%d ", len); cp = p; while (len--) if (decimal) printf(" %d", *cp++); else printf(" %02x", *cp++); printf("\n"); } void ktrcaprights(cap_rights_t *rightsp) { printf("cap_rights_t "); sysdecode_cap_rights(stdout, rightsp); printf("\n"); } static void ktrtimeval(struct timeval *tv) { printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec); } void ktritimerval(struct itimerval *it) { printf("itimerval { .interval = "); ktrtimeval(&it->it_interval); printf(", .value = "); ktrtimeval(&it->it_value); printf(" }\n"); } void ktrsockaddr(struct sockaddr *sa) { /* TODO: Support additional address families #include struct sockaddr_nb *nb; */ const char *str; char addr[64]; /* * note: ktrstruct() has already verified that sa points to a * buffer at least sizeof(struct sockaddr) bytes long and exactly * sa->sa_len bytes long. */ printf("struct sockaddr { "); str = sysdecode_sockaddr_family(sa->sa_family); if (str != NULL) printf("%s", str); else printf("", sa->sa_family); printf(", "); #define check_sockaddr_len(n) \ if (sa_##n.s##n##_len < sizeof(struct sockaddr_##n)) { \ printf("invalid"); \ break; \ } switch(sa->sa_family) { case AF_INET: { struct sockaddr_in sa_in; memset(&sa_in, 0, sizeof(sa_in)); memcpy(&sa_in, sa, sa->sa_len); check_sockaddr_len(in); inet_ntop(AF_INET, &sa_in.sin_addr, addr, sizeof addr); printf("%s:%u", addr, ntohs(sa_in.sin_port)); break; } case AF_INET6: { struct sockaddr_in6 sa_in6; memset(&sa_in6, 0, sizeof(sa_in6)); memcpy(&sa_in6, sa, sa->sa_len); check_sockaddr_len(in6); getnameinfo((struct sockaddr *)&sa_in6, sizeof(sa_in6), addr, sizeof(addr), NULL, 0, NI_NUMERICHOST); printf("[%s]:%u", addr, htons(sa_in6.sin6_port)); break; } case AF_UNIX: { struct sockaddr_un sa_un; memset(&sa_un, 0, sizeof(sa_un)); memcpy(&sa_un, sa, sa->sa_len); printf("%.*s", (int)sizeof(sa_un.sun_path), sa_un.sun_path); break; } default: printf("unknown address family"); } printf(" }\n"); } void ktrstat(struct stat *statp) { char mode[12], timestr[PATH_MAX + 4]; struct passwd *pwd; struct group *grp; struct tm *tm; /* * note: ktrstruct() has already verified that statp points to a * buffer exactly sizeof(struct stat) bytes long. */ printf("struct stat {"); printf("dev=%ju, ino=%ju, ", (uintmax_t)statp->st_dev, (uintmax_t)statp->st_ino); if (!resolv) printf("mode=0%jo, ", (uintmax_t)statp->st_mode); else { strmode(statp->st_mode, mode); printf("mode=%s, ", mode); } printf("nlink=%ju, ", (uintmax_t)statp->st_nlink); if (!resolv) { pwd = NULL; } else { #ifdef WITH_CASPER if (cappwd != NULL) pwd = cap_getpwuid(cappwd, statp->st_uid); else #endif pwd = getpwuid(statp->st_uid); } if (pwd == NULL) printf("uid=%ju, ", (uintmax_t)statp->st_uid); else printf("uid=\"%s\", ", pwd->pw_name); if (!resolv) { grp = NULL; } else { #ifdef WITH_CASPER if (capgrp != NULL) grp = cap_getgrgid(capgrp, statp->st_gid); else #endif grp = getgrgid(statp->st_gid); } if (grp == NULL) printf("gid=%ju, ", (uintmax_t)statp->st_gid); else printf("gid=\"%s\", ", grp->gr_name); printf("rdev=%ju, ", (uintmax_t)statp->st_rdev); printf("atime="); if (!resolv) printf("%jd", (intmax_t)statp->st_atim.tv_sec); else { tm = localtime(&statp->st_atim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_atim.tv_nsec != 0) printf(".%09ld, ", statp->st_atim.tv_nsec); else printf(", "); printf("mtime="); if (!resolv) printf("%jd", (intmax_t)statp->st_mtim.tv_sec); else { tm = localtime(&statp->st_mtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_mtim.tv_nsec != 0) printf(".%09ld, ", statp->st_mtim.tv_nsec); else printf(", "); printf("ctime="); if (!resolv) printf("%jd", (intmax_t)statp->st_ctim.tv_sec); else { tm = localtime(&statp->st_ctim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_ctim.tv_nsec != 0) printf(".%09ld, ", statp->st_ctim.tv_nsec); else printf(", "); printf("birthtime="); if (!resolv) printf("%jd", (intmax_t)statp->st_birthtim.tv_sec); else { tm = localtime(&statp->st_birthtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_birthtim.tv_nsec != 0) printf(".%09ld, ", statp->st_birthtim.tv_nsec); else printf(", "); printf("size=%jd, blksize=%ju, blocks=%jd, flags=0x%x", (uintmax_t)statp->st_size, (uintmax_t)statp->st_blksize, (intmax_t)statp->st_blocks, statp->st_flags); printf(" }\n"); } void ktrbitset(char *name, struct bitset *set, size_t setlen) { int i, maxi, c = 0; if (setlen > INT32_MAX) setlen = INT32_MAX; maxi = setlen * CHAR_BIT; printf("%s [ ", name); for (i = 0; i < maxi; i++) { if (!BIT_ISSET(setlen, i, set)) continue; if (c == 0) printf("%d", i); else printf(", %d", i); c++; } if (c == 0) printf(" empty ]\n"); else printf(" ]\n"); } void ktrstruct(char *buf, size_t buflen) { char *name, *data; size_t namelen, datalen; int i; cap_rights_t rights; struct itimerval it; struct stat sb; struct sockaddr_storage ss; struct bitset *set; for (name = buf, namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; data = buf + namelen + 1; datalen = buflen - namelen - 1; if (datalen == 0) goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalpha(name[i]) && name[i] != '_') goto invalid; if (strcmp(name, "caprights") == 0) { if (datalen != sizeof(cap_rights_t)) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; memcpy(&it, data, datalen); ktritimerval(&it); } else if (strcmp(name, "stat") == 0) { if (datalen != sizeof(struct stat)) goto invalid; memcpy(&sb, data, datalen); ktrstat(&sb); } else if (strcmp(name, "sockaddr") == 0) { if (datalen > sizeof(ss)) goto invalid; memcpy(&ss, data, datalen); if (datalen != ss.ss_len) goto invalid; ktrsockaddr((struct sockaddr *)&ss); } else if (strcmp(name, "cpuset_t") == 0) { if (datalen < 1) goto invalid; set = malloc(datalen); if (set == NULL) errx(1, "%s", strerror(ENOMEM)); memcpy(set, data, datalen); ktrbitset(name, set, datalen); free(set); } else { #ifdef SYSDECODE_HAVE_LINUX if (ktrstruct_linux(name, data, datalen) == false) #endif printf("unknown structure\n"); } return; invalid: printf("invalid record\n"); } void ktrcapfail(struct ktr_cap_fail *ktr) { + union ktr_cap_data *kcd = &ktr->cap_data; + switch (ktr->cap_type) { case CAPFAIL_NOTCAPABLE: /* operation on fd with insufficient capabilities */ printf("operation requires "); - sysdecode_cap_rights(stdout, &ktr->cap_needed); + sysdecode_cap_rights(stdout, &kcd->cap_needed); printf(", descriptor holds "); - sysdecode_cap_rights(stdout, &ktr->cap_held); + sysdecode_cap_rights(stdout, &kcd->cap_held); break; case CAPFAIL_INCREASE: /* requested more capabilities than fd already has */ printf("attempt to increase capabilities from "); - sysdecode_cap_rights(stdout, &ktr->cap_held); + sysdecode_cap_rights(stdout, &kcd->cap_held); printf(" to "); - sysdecode_cap_rights(stdout, &ktr->cap_needed); + sysdecode_cap_rights(stdout, &kcd->cap_needed); break; case CAPFAIL_SYSCALL: /* called restricted syscall */ - printf("disallowed system call"); + printf("system call not allowed: "); + syscallname(ktr->cap_code, ktr->cap_svflags); + if (syscallabi(ktr->cap_svflags) == SYSDECODE_ABI_FREEBSD) { + switch (ktr->cap_code) { + case SYS_sysarch: + printf(", op: "); + print_integer_arg(sysdecode_sysarch_number, + kcd->cap_int); + break; + case SYS_fcntl: + printf(", cmd: "); + print_integer_arg(sysdecode_fcntl_cmd, + kcd->cap_int); + break; + } + } break; - case CAPFAIL_LOOKUP: - /* absolute or AT_FDCWD path, ".." path, etc. */ - printf("restricted VFS lookup"); + case CAPFAIL_SIGNAL: + /* sent signal to proc other than self */ + syscallname(ktr->cap_code, ktr->cap_svflags); + printf(": signal delivery not allowed: "); + print_integer_arg(sysdecode_signal, kcd->cap_int); break; - default: - printf("unknown capability failure: "); - sysdecode_cap_rights(stdout, &ktr->cap_needed); - printf(" "); - sysdecode_cap_rights(stdout, &ktr->cap_held); + case CAPFAIL_PROTO: + /* created socket with restricted protocol */ + syscallname(ktr->cap_code, ktr->cap_svflags); + printf(": protocol not allowed: "); + print_integer_arg(sysdecode_ipproto, kcd->cap_int); break; + case CAPFAIL_SOCKADDR: + /* unable to look up address */ + syscallname(ktr->cap_code, ktr->cap_svflags); + printf(": restricted address lookup: "); + ktrsockaddr(&kcd->cap_sockaddr); + return; + case CAPFAIL_NAMEI: + /* absolute or AT_FDCWD path, ".." path, etc. */ + syscallname(ktr->cap_code, ktr->cap_svflags); + printf(": restricted VFS lookup: %s\n", kcd->cap_path); + return; + case CAPFAIL_CPUSET: + /* modification of an external cpuset */ + syscallname(ktr->cap_code, ktr->cap_svflags); + printf(": restricted cpuset operation\n"); + return; + default: + syscallname(ktr->cap_code, ktr->cap_svflags); + printf(": unknown capability failure\n"); + return; } printf("\n"); } void ktrfault(struct ktr_fault *ktr) { printf("0x%jx ", (uintmax_t)ktr->vaddr); print_mask_arg(sysdecode_vmprot, ktr->type); printf("\n"); } void ktrfaultend(struct ktr_faultend *ktr) { const char *str; str = sysdecode_vmresult(ktr->result); if (str != NULL) printf("%s", str); else printf("", ktr->result); printf("\n"); } void ktrkevent(struct kevent *kev) { printf("{ ident="); switch (kev->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: case EVFILT_EMPTY: printf("%ju", (uintmax_t)kev->ident); break; case EVFILT_SIGNAL: print_signal(kev->ident); break; default: printf("%p", (void *)kev->ident); } printf(", filter="); print_integer_arg(sysdecode_kevent_filter, kev->filter); printf(", flags="); print_mask_arg0(sysdecode_kevent_flags, kev->flags); printf(", fflags="); sysdecode_kevent_fflags(stdout, kev->filter, kev->fflags, decimal ? 10 : 16); printf(", data=%#jx, udata=%p }", (uintmax_t)kev->data, kev->udata); } void ktrpollfd(struct pollfd *pfd) { printf("{ fd=%d", pfd->fd); printf(", events="); print_mask_arg0(sysdecode_pollfd_events, pfd->events); printf(", revents="); print_mask_arg0(sysdecode_pollfd_events, pfd->revents); printf("}"); } void ktrstructarray(struct ktr_struct_array *ksa, size_t buflen) { struct kevent kev; struct pollfd pfd; char *name, *data; size_t namelen, datalen; int i; bool first; buflen -= sizeof(*ksa); for (name = (char *)(ksa + 1), namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalnum(name[i]) && name[i] != '_') goto invalid; data = name + namelen + 1; datalen = buflen - namelen - 1; printf("struct %s[] = { ", name); first = true; for (; datalen >= ksa->struct_size; data += ksa->struct_size, datalen -= ksa->struct_size) { if (!first) printf("\n "); else first = false; if (strcmp(name, "kevent") == 0) { if (ksa->struct_size != sizeof(kev)) goto bad_size; memcpy(&kev, data, sizeof(kev)); ktrkevent(&kev); } else if (strcmp(name, "freebsd11_kevent") == 0) { struct freebsd11_kevent kev11; if (ksa->struct_size != sizeof(kev11)) goto bad_size; memcpy(&kev11, data, sizeof(kev11)); memset(&kev, 0, sizeof(kev)); kev.ident = kev11.ident; kev.filter = kev11.filter; kev.flags = kev11.flags; kev.fflags = kev11.fflags; kev.data = kev11.data; kev.udata = kev11.udata; ktrkevent(&kev); #ifdef _WANT_KEVENT32 } else if (strcmp(name, "kevent32") == 0) { struct kevent32 kev32; if (ksa->struct_size != sizeof(kev32)) goto bad_size; memcpy(&kev32, data, sizeof(kev32)); memset(&kev, 0, sizeof(kev)); kev.ident = kev32.ident; kev.filter = kev32.filter; kev.flags = kev32.flags; kev.fflags = kev32.fflags; #if BYTE_ORDER == BIG_ENDIAN kev.data = kev32.data2 | ((int64_t)kev32.data1 << 32); #else kev.data = kev32.data1 | ((int64_t)kev32.data2 << 32); #endif kev.udata = (void *)(uintptr_t)kev32.udata; ktrkevent(&kev); } else if (strcmp(name, "freebsd11_kevent32") == 0) { struct freebsd11_kevent32 kev32; if (ksa->struct_size != sizeof(kev32)) goto bad_size; memcpy(&kev32, data, sizeof(kev32)); memset(&kev, 0, sizeof(kev)); kev.ident = kev32.ident; kev.filter = kev32.filter; kev.flags = kev32.flags; kev.fflags = kev32.fflags; kev.data = kev32.data; kev.udata = (void *)(uintptr_t)kev32.udata; ktrkevent(&kev); #endif } else if (strcmp(name, "pollfd") == 0) { if (ksa->struct_size != sizeof(pfd)) goto bad_size; memcpy(&pfd, data, sizeof(pfd)); ktrpollfd(&pfd); } else { printf(" }\n"); return; } } printf(" }\n"); return; invalid: printf("invalid record\n"); return; bad_size: printf(" }\n"); return; } void usage(void) { fprintf(stderr, "usage: kdump [-dEnlHRrSsTA] [-f trfile] " "[-m maxdata] [-p pid] [-t trstr]\n"); exit(1); }