Index: head/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c (revision 344451) +++ head/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c (revision 344452) @@ -1,2691 +1,2693 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2010 The FreeBSD Foundation * * $FreeBSD$ */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ #include #include #include #include #include #include #ifdef illumos #include #endif #include #include #include #ifdef illumos #include #endif #include #include #include #include #include #include #include #include #ifdef illumos #include #endif #include #include #ifndef illumos #include #include #include #include #include #include #include #include #include #include #include #include #endif /* * User-Land Trap-Based Tracing * ---------------------------- * * The fasttrap provider allows DTrace consumers to instrument any user-level * instruction to gather data; this includes probes with semantic * signifigance like entry and return as well as simple offsets into the * function. While the specific techniques used are very ISA specific, the * methodology is generalizable to any architecture. * * * The General Methodology * ----------------------- * * With the primary goal of tracing every user-land instruction and the * limitation that we can't trust user space so don't want to rely on much * information there, we begin by replacing the instructions we want to trace * with trap instructions. Each instruction we overwrite is saved into a hash * table keyed by process ID and pc address. When we enter the kernel due to * this trap instruction, we need the effects of the replaced instruction to * appear to have occurred before we proceed with the user thread's * execution. * * Each user level thread is represented by a ulwp_t structure which is * always easily accessible through a register. The most basic way to produce * the effects of the instruction we replaced is to copy that instruction out * to a bit of scratch space reserved in the user thread's ulwp_t structure * (a sort of kernel-private thread local storage), set the PC to that * scratch space and single step. When we reenter the kernel after single * stepping the instruction we must then adjust the PC to point to what would * normally be the next instruction. Of course, special care must be taken * for branches and jumps, but these represent such a small fraction of any * instruction set that writing the code to emulate these in the kernel is * not too difficult. * * Return probes may require several tracepoints to trace every return site, * and, conversely, each tracepoint may activate several probes (the entry * and offset 0 probes, for example). To solve this muliplexing problem, * tracepoints contain lists of probes to activate and probes contain lists * of tracepoints to enable. If a probe is activated, it adds its ID to * existing tracepoints or creates new ones as necessary. * * Most probes are activated _before_ the instruction is executed, but return * probes are activated _after_ the effects of the last instruction of the * function are visible. Return probes must be fired _after_ we have * single-stepped the instruction whereas all other probes are fired * beforehand. * * * Lock Ordering * ------------- * * The lock ordering below -- both internally and with respect to the DTrace * framework -- is a little tricky and bears some explanation. Each provider * has a lock (ftp_mtx) that protects its members including reference counts * for enabled probes (ftp_rcount), consumers actively creating probes * (ftp_ccount) and USDT consumers (ftp_mcount); all three prevent a provider * from being freed. A provider is looked up by taking the bucket lock for the * provider hash table, and is returned with its lock held. The provider lock * may be taken in functions invoked by the DTrace framework, but may not be * held while calling functions in the DTrace framework. * * To ensure consistency over multiple calls to the DTrace framework, the * creation lock (ftp_cmtx) should be held. Naturally, the creation lock may * not be taken when holding the provider lock as that would create a cyclic * lock ordering. In situations where one would naturally take the provider * lock and then the creation lock, we instead up a reference count to prevent * the provider from disappearing, drop the provider lock, and acquire the * creation lock. * * Briefly: * bucket lock before provider lock * DTrace before provider lock * creation lock before DTrace * never hold the provider lock and creation lock simultaneously */ static d_open_t fasttrap_open; static d_ioctl_t fasttrap_ioctl; static struct cdevsw fasttrap_cdevsw = { .d_version = D_VERSION, .d_open = fasttrap_open, .d_ioctl = fasttrap_ioctl, .d_name = "fasttrap", }; static struct cdev *fasttrap_cdev; static dtrace_meta_provider_id_t fasttrap_meta_id; static struct proc *fasttrap_cleanup_proc; static struct mtx fasttrap_cleanup_mtx; static uint_t fasttrap_cleanup_work, fasttrap_cleanup_drain, fasttrap_cleanup_cv; /* * Generation count on modifications to the global tracepoint lookup table. */ static volatile uint64_t fasttrap_mod_gen; /* * When the fasttrap provider is loaded, fasttrap_max is set to either * FASTTRAP_MAX_DEFAULT, or the value for fasttrap-max-probes in the * fasttrap.conf file (Illumos), or the value provied in the loader.conf (FreeBSD). * Each time a probe is created, fasttrap_total is incremented by the number * of tracepoints that may be associated with that probe; fasttrap_total is capped * at fasttrap_max. */ #define FASTTRAP_MAX_DEFAULT 250000 static uint32_t fasttrap_max = FASTTRAP_MAX_DEFAULT; static uint32_t fasttrap_total; /* * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ #define FASTTRAP_TPOINTS_DEFAULT_SIZE 0x4000 #define FASTTRAP_PROVIDERS_DEFAULT_SIZE 0x100 #define FASTTRAP_PROCS_DEFAULT_SIZE 0x100 #define FASTTRAP_PID_NAME "pid" fasttrap_hash_t fasttrap_tpoints; static fasttrap_hash_t fasttrap_provs; static fasttrap_hash_t fasttrap_procs; static uint64_t fasttrap_pid_count; /* pid ref count */ static kmutex_t fasttrap_count_mtx; /* lock on ref count */ #define FASTTRAP_ENABLE_FAIL 1 #define FASTTRAP_ENABLE_PARTIAL 2 static int fasttrap_tracepoint_enable(proc_t *, fasttrap_probe_t *, uint_t); static void fasttrap_tracepoint_disable(proc_t *, fasttrap_probe_t *, uint_t); static fasttrap_provider_t *fasttrap_provider_lookup(pid_t, const char *, const dtrace_pattr_t *); static void fasttrap_provider_retire(pid_t, const char *, int); static void fasttrap_provider_free(fasttrap_provider_t *); static fasttrap_proc_t *fasttrap_proc_lookup(pid_t); static void fasttrap_proc_release(fasttrap_proc_t *); #ifndef illumos static void fasttrap_thread_dtor(void *, struct thread *); #endif #define FASTTRAP_PROVS_INDEX(pid, name) \ ((fasttrap_hash_str(name) + (pid)) & fasttrap_provs.fth_mask) #define FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask) #ifndef illumos struct rmlock fasttrap_tp_lock; static eventhandler_tag fasttrap_thread_dtor_tag; #endif static unsigned long tpoints_hash_size = FASTTRAP_TPOINTS_DEFAULT_SIZE; #ifdef __FreeBSD__ SYSCTL_DECL(_kern_dtrace); SYSCTL_NODE(_kern_dtrace, OID_AUTO, fasttrap, CTLFLAG_RD, 0, "DTrace fasttrap parameters"); SYSCTL_UINT(_kern_dtrace_fasttrap, OID_AUTO, max_probes, CTLFLAG_RWTUN, &fasttrap_max, FASTTRAP_MAX_DEFAULT, "Maximum number of fasttrap probes"); SYSCTL_ULONG(_kern_dtrace_fasttrap, OID_AUTO, tpoints_hash_size, CTLFLAG_RDTUN, &tpoints_hash_size, FASTTRAP_TPOINTS_DEFAULT_SIZE, "Size of the tracepoint hash table"); #endif static int fasttrap_highbit(ulong_t i) { int h = 1; if (i == 0) return (0); #ifdef _LP64 if (i & 0xffffffff00000000ul) { h += 32; i >>= 32; } #endif if (i & 0xffff0000) { h += 16; i >>= 16; } if (i & 0xff00) { h += 8; i >>= 8; } if (i & 0xf0) { h += 4; i >>= 4; } if (i & 0xc) { h += 2; i >>= 2; } if (i & 0x2) { h += 1; } return (h); } static uint_t fasttrap_hash_str(const char *p) { unsigned int g; uint_t hval = 0; while (*p) { hval = (hval << 4) + *p++; if ((g = (hval & 0xf0000000)) != 0) hval ^= g >> 24; hval &= ~g; } return (hval); } void fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc) { #ifdef illumos sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGTRAP; sqp->sq_info.si_code = TRAP_DTRACE; sqp->sq_info.si_addr = (caddr_t)pc; mutex_enter(&p->p_lock); sigaddqa(p, t, sqp); mutex_exit(&p->p_lock); if (t != NULL) aston(t); #else ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); ksiginfo_init(ksi); ksi->ksi_signo = SIGTRAP; ksi->ksi_code = TRAP_DTRACE; ksi->ksi_addr = (caddr_t)pc; PROC_LOCK(p); (void) tdsendsignal(p, t, SIGTRAP, ksi); PROC_UNLOCK(p); #endif } #ifndef illumos /* * Obtain a chunk of scratch space in the address space of the target process. */ fasttrap_scrspace_t * fasttrap_scraddr(struct thread *td, fasttrap_proc_t *fprc) { fasttrap_scrblock_t *scrblk; fasttrap_scrspace_t *scrspc; struct proc *p; vm_offset_t addr; int error, i; scrspc = NULL; if (td->t_dtrace_sscr != NULL) { /* If the thread already has scratch space, we're done. */ scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr; return (scrspc); } p = td->td_proc; mutex_enter(&fprc->ftpc_mtx); if (LIST_EMPTY(&fprc->ftpc_fscr)) { /* * No scratch space is available, so we'll map a new scratch * space block into the traced process' address space. */ addr = 0; error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, FASTTRAP_SCRBLOCK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) goto done; scrblk = malloc(sizeof(*scrblk), M_SOLARIS, M_WAITOK); scrblk->ftsb_addr = addr; LIST_INSERT_HEAD(&fprc->ftpc_scrblks, scrblk, ftsb_next); /* * Carve the block up into chunks and put them on the free list. */ for (i = 0; i < FASTTRAP_SCRBLOCK_SIZE / FASTTRAP_SCRSPACE_SIZE; i++) { scrspc = malloc(sizeof(*scrspc), M_SOLARIS, M_WAITOK); scrspc->ftss_addr = addr + i * FASTTRAP_SCRSPACE_SIZE; LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next); } } /* * Take the first scratch chunk off the free list, put it on the * allocated list, and return its address. */ scrspc = LIST_FIRST(&fprc->ftpc_fscr); LIST_REMOVE(scrspc, ftss_next); LIST_INSERT_HEAD(&fprc->ftpc_ascr, scrspc, ftss_next); /* * This scratch space is reserved for use by td until the thread exits. */ td->t_dtrace_sscr = scrspc; done: mutex_exit(&fprc->ftpc_mtx); return (scrspc); } /* * Return any allocated per-thread scratch space chunks back to the process' * free list. */ static void fasttrap_thread_dtor(void *arg __unused, struct thread *td) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc; fasttrap_scrspace_t *scrspc; pid_t pid; if (td->t_dtrace_sscr == NULL) return; pid = td->td_proc->p_pid; bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; fprc = NULL; /* Look up the fasttrap process handle for this process. */ mutex_enter(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); break; } } if (fprc == NULL) { mutex_exit(&bucket->ftb_mtx); return; } scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr; LIST_REMOVE(scrspc, ftss_next); LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next); mutex_exit(&fprc->ftpc_mtx); } #endif /* * This function ensures that no threads are actively using the memory * associated with probes that were formerly live. */ static void fasttrap_mod_barrier(uint64_t gen) { int i; if (gen < fasttrap_mod_gen) return; fasttrap_mod_gen++; #ifdef illumos CPU_FOREACH(i) { mutex_enter(&fasttrap_cpuc_pid_lock[i]); mutex_exit(&fasttrap_cpuc_pid_lock[i]); } #else rm_wlock(&fasttrap_tp_lock); rm_wunlock(&fasttrap_tp_lock); #endif } /* * This function performs asynchronous cleanup of fasttrap providers. The * Solaris implementation of this mechanism use a timeout that's activated in * fasttrap_pid_cleanup(), but this doesn't work in FreeBSD: one may sleep while * holding the DTrace mutexes, but it is unsafe to sleep in a callout handler. * Thus we use a dedicated process to perform the cleanup when requested. */ /*ARGSUSED*/ static void fasttrap_pid_cleanup_cb(void *data) { fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; int i, later = 0, rval; mtx_lock(&fasttrap_cleanup_mtx); while (!fasttrap_cleanup_drain || later > 0) { fasttrap_cleanup_work = 0; mtx_unlock(&fasttrap_cleanup_mtx); later = 0; /* * Iterate over all the providers trying to remove the marked * ones. If a provider is marked but not retired, we just * have to take a crack at removing it -- it's no big deal if * we can't. */ for (i = 0; i < fasttrap_provs.fth_nent; i++) { bucket = &fasttrap_provs.fth_table[i]; mutex_enter(&bucket->ftb_mtx); fpp = (fasttrap_provider_t **)&bucket->ftb_data; while ((fp = *fpp) != NULL) { if (!fp->ftp_marked) { fpp = &fp->ftp_next; continue; } mutex_enter(&fp->ftp_mtx); /* * If this provider has consumers actively * creating probes (ftp_ccount) or is a USDT * provider (ftp_mcount), we can't unregister * or even condense. */ if (fp->ftp_ccount != 0 || fp->ftp_mcount != 0) { mutex_exit(&fp->ftp_mtx); fp->ftp_marked = 0; continue; } if (!fp->ftp_retired || fp->ftp_rcount != 0) fp->ftp_marked = 0; mutex_exit(&fp->ftp_mtx); /* * If we successfully unregister this * provider we can remove it from the hash * chain and free the memory. If our attempt * to unregister fails and this is a retired * provider, increment our flag to try again * pretty soon. If we've consumed more than * half of our total permitted number of * probes call dtrace_condense() to try to * clean out the unenabled probes. */ provid = fp->ftp_provid; if ((rval = dtrace_unregister(provid)) != 0) { if (fasttrap_total > fasttrap_max / 2) (void) dtrace_condense(provid); if (rval == EAGAIN) fp->ftp_marked = 1; later += fp->ftp_marked; fpp = &fp->ftp_next; } else { *fpp = fp->ftp_next; fasttrap_provider_free(fp); } } mutex_exit(&bucket->ftb_mtx); } mtx_lock(&fasttrap_cleanup_mtx); /* * If we were unable to retire a provider, try again after a * second. This situation can occur in certain circumstances * where providers cannot be unregistered even though they have * no probes enabled because of an execution of dtrace -l or * something similar. */ if (later > 0 || fasttrap_cleanup_work || fasttrap_cleanup_drain) { mtx_unlock(&fasttrap_cleanup_mtx); pause("ftclean", hz); mtx_lock(&fasttrap_cleanup_mtx); } else mtx_sleep(&fasttrap_cleanup_cv, &fasttrap_cleanup_mtx, 0, "ftcl", 0); } /* * Wake up the thread in fasttrap_unload() now that we're done. */ wakeup(&fasttrap_cleanup_drain); mtx_unlock(&fasttrap_cleanup_mtx); kthread_exit(); } /* * Activates the asynchronous cleanup mechanism. */ static void fasttrap_pid_cleanup(void) { mtx_lock(&fasttrap_cleanup_mtx); if (!fasttrap_cleanup_work) { fasttrap_cleanup_work = 1; wakeup(&fasttrap_cleanup_cv); } mtx_unlock(&fasttrap_cleanup_mtx); } /* * This is called from cfork() via dtrace_fasttrap_fork(). The child * process's address space is (roughly) a copy of the parent process's so * we have to remove all the instrumentation we had previously enabled in the * parent. */ static void fasttrap_fork(proc_t *p, proc_t *cp) { #ifndef illumos fasttrap_scrblock_t *scrblk; fasttrap_proc_t *fprc = NULL; #endif pid_t ppid = p->p_pid; int i; ASSERT(curproc == p); #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #else PROC_LOCK_ASSERT(p, MA_OWNED); #endif #ifdef illumos ASSERT(p->p_dtrace_count > 0); #else /* * This check is purposely here instead of in kern_fork.c because, * for legal resons, we cannot include the dtrace_cddl.h header * inside kern_fork.c and insert if-clause there. */ if (p->p_dtrace_count == 0 && p->p_dtrace_helpers == NULL) return; #endif ASSERT(cp->p_dtrace_count == 0); /* * This would be simpler and faster if we maintained per-process * hash tables of enabled tracepoints. It could, however, potentially * slow down execution of a tracepoint since we'd need to go * through two levels of indirection. In the future, we should * consider either maintaining per-process ancillary lists of * enabled tracepoints or hanging a pointer to a per-process hash * table of enabled tracepoints off the proc structure. */ /* * We don't have to worry about the child process disappearing * because we're in fork(). */ #ifdef illumos mtx_lock_spin(&cp->p_slock); sprlock_proc(cp); mtx_unlock_spin(&cp->p_slock); #else /* * fasttrap_tracepoint_remove() expects the child process to be * unlocked and the VM then expects curproc to be unlocked. */ _PHOLD(cp); PROC_UNLOCK(cp); PROC_UNLOCK(p); if (p->p_dtrace_count == 0) goto dup_helpers; #endif /* * Iterate over every tracepoint looking for ones that belong to the * parent process, and remove each from the child process. */ for (i = 0; i < fasttrap_tpoints.fth_nent; i++) { fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket = &fasttrap_tpoints.fth_table[i]; mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == ppid && tp->ftt_proc->ftpc_acount != 0) { int ret = fasttrap_tracepoint_remove(cp, tp); ASSERT(ret == 0); /* * The count of active providers can only be * decremented (i.e. to zero) during exec, * exit, and removal of a meta provider so it * should be impossible to drop the count * mid-fork. */ ASSERT(tp->ftt_proc->ftpc_acount != 0); #ifndef illumos fprc = tp->ftt_proc; #endif } } mutex_exit(&bucket->ftb_mtx); #ifndef illumos /* * Unmap any scratch space inherited from the parent's address * space. */ if (fprc != NULL) { mutex_enter(&fprc->ftpc_mtx); LIST_FOREACH(scrblk, &fprc->ftpc_scrblks, ftsb_next) { vm_map_remove(&cp->p_vmspace->vm_map, scrblk->ftsb_addr, scrblk->ftsb_addr + FASTTRAP_SCRBLOCK_SIZE); } mutex_exit(&fprc->ftpc_mtx); } #endif } #ifdef illumos mutex_enter(&cp->p_lock); sprunlock(cp); #else dup_helpers: if (p->p_dtrace_helpers != NULL) dtrace_helpers_duplicate(p, cp); PROC_LOCK(p); PROC_LOCK(cp); _PRELE(cp); #endif } /* * This is called from proc_exit() or from exec_common() if p_dtrace_probes * is set on the proc structure to indicate that there is a pid provider * associated with this process. */ static void fasttrap_exec_exit(proc_t *p) { #ifndef illumos struct thread *td; #endif #ifdef illumos ASSERT(p == curproc); #else PROC_LOCK_ASSERT(p, MA_OWNED); _PHOLD(p); /* * Since struct threads may be recycled, we cannot rely on t_dtrace_sscr * fields to be zeroed by kdtrace_thread_ctor. Thus we must zero it * ourselves when a process exits. */ FOREACH_THREAD_IN_PROC(p, td) td->t_dtrace_sscr = NULL; PROC_UNLOCK(p); #endif /* * We clean up the pid provider for this process here; user-land * static probes are handled by the meta-provider remove entry point. */ fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0); #ifndef illumos if (p->p_dtrace_helpers) dtrace_helpers_destroy(p); PROC_LOCK(p); _PRELE(p); #endif } /*ARGSUSED*/ static void fasttrap_pid_provide(void *arg, dtrace_probedesc_t *desc) { /* * There are no "default" pid probes. */ } static int fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index) { fasttrap_tracepoint_t *tp, *new_tp = NULL; fasttrap_bucket_t *bucket; fasttrap_id_t *id; pid_t pid; uintptr_t pc; ASSERT(index < probe->ftp_ntps); pid = probe->ftp_pid; pc = probe->ftp_tps[index].fit_tp->ftt_pc; id = &probe->ftp_tps[index].fit_id; ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); #ifdef illumos ASSERT(!(p->p_flag & SVFORK)); #endif /* * Before we make any modifications, make sure we've imposed a barrier * on the generation in which this probe was last modified. */ fasttrap_mod_barrier(probe->ftp_gen); bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * If the tracepoint has already been enabled, just add our id to the * list of interested probes. This may be our second time through * this path in which case we'll have constructed the tracepoint we'd * like to install. If we can't find a match, and have an allocated * tracepoint ready to go, enable that one now. * * A tracepoint whose process is defunct is also considered defunct. */ again: mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { /* * Note that it's safe to access the active count on the * associated proc structure because we know that at least one * provider (this one) will still be around throughout this * operation. */ if (tp->ftt_pid != pid || tp->ftt_pc != pc || tp->ftt_proc->ftpc_acount == 0) continue; /* * Now that we've found a matching tracepoint, it would be * a decent idea to confirm that the tracepoint is still * enabled and the trap instruction hasn't been overwritten. * Since this is a little hairy, we'll punt for now. */ /* * This can't be the first interested probe. We don't have * to worry about another thread being in the midst of * deleting this tracepoint (which would be the only valid * reason for a tracepoint to have no interested probes) * since we're holding P_PR_LOCK for this process. */ ASSERT(tp->ftt_ids != NULL || tp->ftt_retids != NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: id->fti_next = tp->ftt_ids; membar_producer(); tp->ftt_ids = id; membar_producer(); break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: id->fti_next = tp->ftt_retids; membar_producer(); tp->ftt_retids = id; membar_producer(); break; default: ASSERT(0); } mutex_exit(&bucket->ftb_mtx); if (new_tp != NULL) { new_tp->ftt_ids = NULL; new_tp->ftt_retids = NULL; } return (0); } /* * If we have a good tracepoint ready to go, install it now while * we have the lock held and no one can screw with us. */ if (new_tp != NULL) { int rc = 0; new_tp->ftt_next = bucket->ftb_data; membar_producer(); bucket->ftb_data = new_tp; membar_producer(); mutex_exit(&bucket->ftb_mtx); /* * Activate the tracepoint in the ISA-specific manner. * If this fails, we need to report the failure, but * indicate that this tracepoint must still be disabled * by calling fasttrap_tracepoint_disable(). */ if (fasttrap_tracepoint_install(p, new_tp) != 0) rc = FASTTRAP_ENABLE_PARTIAL; /* * Increment the count of the number of tracepoints active in * the victim process. */ #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #endif p->p_dtrace_count++; return (rc); } mutex_exit(&bucket->ftb_mtx); /* * Initialize the tracepoint that's been preallocated with the probe. */ new_tp = probe->ftp_tps[index].fit_tp; ASSERT(new_tp->ftt_pid == pid); ASSERT(new_tp->ftt_pc == pc); ASSERT(new_tp->ftt_proc == probe->ftp_prov->ftp_proc); ASSERT(new_tp->ftt_ids == NULL); ASSERT(new_tp->ftt_retids == NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: id->fti_next = NULL; new_tp->ftt_ids = id; break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: id->fti_next = NULL; new_tp->ftt_retids = id; break; default: ASSERT(0); } #ifdef __FreeBSD__ if (SV_PROC_FLAG(p, SV_LP64)) p->p_model = DATAMODEL_LP64; else p->p_model = DATAMODEL_ILP32; #endif /* * If the ISA-dependent initialization goes to plan, go back to the * beginning and try to install this freshly made tracepoint. */ if (fasttrap_tracepoint_init(p, new_tp, pc, id->fti_ptype) == 0) goto again; new_tp->ftt_ids = NULL; new_tp->ftt_retids = NULL; return (FASTTRAP_ENABLE_FAIL); } static void fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index) { fasttrap_bucket_t *bucket; fasttrap_provider_t *provider = probe->ftp_prov; fasttrap_tracepoint_t **pp, *tp; fasttrap_id_t *id, **idp = NULL; pid_t pid; uintptr_t pc; ASSERT(index < probe->ftp_ntps); pid = probe->ftp_pid; pc = probe->ftp_tps[index].fit_tp->ftt_pc; id = &probe->ftp_tps[index].fit_id; ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); /* * Find the tracepoint and make sure that our id is one of the * ones registered with it. */ bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; mutex_enter(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == pid && tp->ftt_pc == pc && tp->ftt_proc == provider->ftp_proc) break; } /* * If we somehow lost this tracepoint, we're in a world of hurt. */ ASSERT(tp != NULL); switch (id->fti_ptype) { case DTFTP_ENTRY: case DTFTP_OFFSETS: case DTFTP_IS_ENABLED: ASSERT(tp->ftt_ids != NULL); idp = &tp->ftt_ids; break; case DTFTP_RETURN: case DTFTP_POST_OFFSETS: ASSERT(tp->ftt_retids != NULL); idp = &tp->ftt_retids; break; default: ASSERT(0); } while ((*idp)->fti_probe != probe) { idp = &(*idp)->fti_next; ASSERT(*idp != NULL); } id = *idp; *idp = id->fti_next; membar_producer(); ASSERT(id->fti_probe == probe); /* * If there are other registered enablings of this tracepoint, we're * all done, but if this was the last probe assocated with this * this tracepoint, we need to remove and free it. */ if (tp->ftt_ids != NULL || tp->ftt_retids != NULL) { /* * If the current probe's tracepoint is in use, swap it * for an unused tracepoint. */ if (tp == probe->ftp_tps[index].fit_tp) { fasttrap_probe_t *tmp_probe; fasttrap_tracepoint_t **tmp_tp; uint_t tmp_index; if (tp->ftt_ids != NULL) { tmp_probe = tp->ftt_ids->fti_probe; /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } else { tmp_probe = tp->ftt_retids->fti_probe; /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } ASSERT(*tmp_tp != NULL); ASSERT(*tmp_tp != probe->ftp_tps[index].fit_tp); ASSERT((*tmp_tp)->ftt_ids == NULL); ASSERT((*tmp_tp)->ftt_retids == NULL); probe->ftp_tps[index].fit_tp = *tmp_tp; *tmp_tp = tp; } mutex_exit(&bucket->ftb_mtx); /* * Tag the modified probe with the generation in which it was * changed. */ probe->ftp_gen = fasttrap_mod_gen; return; } mutex_exit(&bucket->ftb_mtx); /* * We can't safely remove the tracepoint from the set of active * tracepoints until we've actually removed the fasttrap instruction * from the process's text. We can, however, operate on this * tracepoint secure in the knowledge that no other thread is going to * be looking at it since we hold P_PR_LOCK on the process if it's * live or we hold the provider lock on the process if it's dead and * gone. */ /* * We only need to remove the actual instruction if we're looking * at an existing process */ if (p != NULL) { /* * If we fail to restore the instruction we need to kill * this process since it's in a completely unrecoverable * state. */ if (fasttrap_tracepoint_remove(p, tp) != 0) fasttrap_sigtrap(p, NULL, pc); /* * Decrement the count of the number of tracepoints active * in the victim process. */ #ifdef illumos ASSERT(p->p_proc_flag & P_PR_LOCK); #endif p->p_dtrace_count--; + + atomic_add_rel_64(&p->p_fasttrap_tp_gen, 1); } /* * Remove the probe from the hash table of active tracepoints. */ mutex_enter(&bucket->ftb_mtx); pp = (fasttrap_tracepoint_t **)&bucket->ftb_data; ASSERT(*pp != NULL); while (*pp != tp) { pp = &(*pp)->ftt_next; ASSERT(*pp != NULL); } *pp = tp->ftt_next; membar_producer(); mutex_exit(&bucket->ftb_mtx); /* * Tag the modified probe with the generation in which it was changed. */ probe->ftp_gen = fasttrap_mod_gen; } static void fasttrap_enable_callbacks(void) { /* * We don't have to play the rw lock game here because we're * providing something rather than taking something away -- * we can be sure that no threads have tried to follow this * function pointer yet. */ mutex_enter(&fasttrap_count_mtx); if (fasttrap_pid_count == 0) { ASSERT(dtrace_pid_probe_ptr == NULL); ASSERT(dtrace_return_probe_ptr == NULL); dtrace_pid_probe_ptr = &fasttrap_pid_probe; dtrace_return_probe_ptr = &fasttrap_return_probe; } ASSERT(dtrace_pid_probe_ptr == &fasttrap_pid_probe); ASSERT(dtrace_return_probe_ptr == &fasttrap_return_probe); fasttrap_pid_count++; mutex_exit(&fasttrap_count_mtx); } static void fasttrap_disable_callbacks(void) { #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count > 0); fasttrap_pid_count--; if (fasttrap_pid_count == 0) { #ifdef illumos cpu_t *cur, *cpu = CPU; for (cur = cpu->cpu_next_onln; cur != cpu; cur = cur->cpu_next_onln) { rw_enter(&cur->cpu_ft_lock, RW_WRITER); } #endif dtrace_pid_probe_ptr = NULL; dtrace_return_probe_ptr = NULL; #ifdef illumos for (cur = cpu->cpu_next_onln; cur != cpu; cur = cur->cpu_next_onln) { rw_exit(&cur->cpu_ft_lock); } #endif } mutex_exit(&fasttrap_count_mtx); } /*ARGSUSED*/ static void fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; proc_t *p = NULL; int i, rc; ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(id == probe->ftp_id); #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif /* * Increment the count of enabled probes on this probe's provider; * the provider can't go away while the probe still exists. We * must increment this even if we aren't able to properly enable * this probe. */ mutex_enter(&probe->ftp_prov->ftp_mtx); probe->ftp_prov->ftp_rcount++; mutex_exit(&probe->ftp_prov->ftp_mtx); /* * If this probe's provider is retired (meaning it was valid in a * previously exec'ed incarnation of this address space), bail out. The * provider can't go away while we're in this code path. */ if (probe->ftp_prov->ftp_retired) return; /* * If we can't find the process, it may be that we're in the context of * a fork in which the traced process is being born and we're copying * USDT probes. Otherwise, the process is gone so bail. */ #ifdef illumos if ((p = sprlock(probe->ftp_pid)) == NULL) { if ((curproc->p_flag & SFORKING) == 0) return; mutex_enter(&pidlock); p = prfind(probe->ftp_pid); if (p == NULL) { /* * So it's not that the target process is being born, * it's that it isn't there at all (and we simply * happen to be forking). Anyway, we know that the * target is definitely gone, so bail out. */ mutex_exit(&pidlock); return (0); } /* * Confirm that curproc is indeed forking the process in which * we're trying to enable probes. */ ASSERT(p->p_parent == curproc); ASSERT(p->p_stat == SIDL); mutex_enter(&p->p_lock); mutex_exit(&pidlock); sprlock_proc(p); } ASSERT(!(p->p_flag & SVFORK)); mutex_exit(&p->p_lock); #else if (pget(probe->ftp_pid, PGET_HOLD | PGET_NOTWEXIT, &p) != 0) return; #endif /* * We have to enable the trap entry point before any user threads have * the chance to execute the trap instruction we're about to place * in their process's text. */ fasttrap_enable_callbacks(); /* * Enable all the tracepoints and add this probe's id to each * tracepoint's list of active probes. */ for (i = 0; i < probe->ftp_ntps; i++) { if ((rc = fasttrap_tracepoint_enable(p, probe, i)) != 0) { /* * If enabling the tracepoint failed completely, * we don't have to disable it; if the failure * was only partial we must disable it. */ if (rc == FASTTRAP_ENABLE_FAIL) i--; else ASSERT(rc == FASTTRAP_ENABLE_PARTIAL); /* * Back up and pull out all the tracepoints we've * created so far for this probe. */ while (i >= 0) { fasttrap_tracepoint_disable(p, probe, i); i--; } #ifdef illumos mutex_enter(&p->p_lock); sprunlock(p); #else PRELE(p); #endif /* * Since we're not actually enabling this probe, * drop our reference on the trap table entry. */ fasttrap_disable_callbacks(); return; } } #ifdef illumos mutex_enter(&p->p_lock); sprunlock(p); #else PRELE(p); #endif probe->ftp_enabled = 1; } /*ARGSUSED*/ static void fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; fasttrap_provider_t *provider = probe->ftp_prov; proc_t *p; int i, whack = 0; ASSERT(id == probe->ftp_id); mutex_enter(&provider->ftp_mtx); /* * We won't be able to acquire a /proc-esque lock on the process * iff the process is dead and gone. In this case, we rely on the * provider lock as a point of mutual exclusion to prevent other * DTrace consumers from disabling this probe. */ if (pget(probe->ftp_pid, PGET_HOLD | PGET_NOTWEXIT, &p) != 0) p = NULL; /* * Disable all the associated tracepoints (for fully enabled probes). */ if (probe->ftp_enabled) { for (i = 0; i < probe->ftp_ntps; i++) { fasttrap_tracepoint_disable(p, probe, i); } } ASSERT(provider->ftp_rcount > 0); provider->ftp_rcount--; if (p != NULL) { /* * Even though we may not be able to remove it entirely, we * mark this retired provider to get a chance to remove some * of the associated probes. */ if (provider->ftp_retired && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); } else { /* * If the process is dead, we're just waiting for the * last probe to be disabled to be able to free it. */ if (provider->ftp_rcount == 0 && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); } if (whack) fasttrap_pid_cleanup(); #ifdef __FreeBSD__ if (p != NULL) PRELE(p); #endif if (!probe->ftp_enabled) return; probe->ftp_enabled = 0; #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); #endif fasttrap_disable_callbacks(); } /*ARGSUSED*/ static void fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { fasttrap_probe_t *probe = parg; char *str; int i, ndx; desc->dtargd_native[0] = '\0'; desc->dtargd_xlate[0] = '\0'; if (probe->ftp_prov->ftp_retired != 0 || desc->dtargd_ndx >= probe->ftp_nargs) { desc->dtargd_ndx = DTRACE_ARGNONE; return; } ndx = (probe->ftp_argmap != NULL) ? probe->ftp_argmap[desc->dtargd_ndx] : desc->dtargd_ndx; str = probe->ftp_ntypes; for (i = 0; i < ndx; i++) { str += strlen(str) + 1; } ASSERT(strlen(str + 1) < sizeof (desc->dtargd_native)); (void) strcpy(desc->dtargd_native, str); if (probe->ftp_xtypes == NULL) return; str = probe->ftp_xtypes; for (i = 0; i < desc->dtargd_ndx; i++) { str += strlen(str) + 1; } ASSERT(strlen(str + 1) < sizeof (desc->dtargd_xlate)); (void) strcpy(desc->dtargd_xlate, str); } /*ARGSUSED*/ static void fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; int i; size_t size; ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(fasttrap_total >= probe->ftp_ntps); atomic_add_32(&fasttrap_total, -probe->ftp_ntps); size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]); if (probe->ftp_gen + 1 >= fasttrap_mod_gen) fasttrap_mod_barrier(probe->ftp_gen); for (i = 0; i < probe->ftp_ntps; i++) { kmem_free(probe->ftp_tps[i].fit_tp, sizeof (fasttrap_tracepoint_t)); } kmem_free(probe, size); } static const dtrace_pattr_t pid_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, }; static dtrace_pops_t pid_pops = { .dtps_provide = fasttrap_pid_provide, .dtps_provide_module = NULL, .dtps_enable = fasttrap_pid_enable, .dtps_disable = fasttrap_pid_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = fasttrap_pid_getargdesc, .dtps_getargval = fasttrap_pid_getarg, .dtps_usermode = NULL, .dtps_destroy = fasttrap_pid_destroy }; static dtrace_pops_t usdt_pops = { .dtps_provide = fasttrap_pid_provide, .dtps_provide_module = NULL, .dtps_enable = fasttrap_pid_enable, .dtps_disable = fasttrap_pid_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = fasttrap_pid_getargdesc, .dtps_getargval = fasttrap_usdt_getarg, .dtps_usermode = NULL, .dtps_destroy = fasttrap_pid_destroy }; static fasttrap_proc_t * fasttrap_proc_lookup(pid_t pid) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, *new_fprc; bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); fprc->ftpc_rcount++; atomic_inc_64(&fprc->ftpc_acount); ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); mutex_exit(&fprc->ftpc_mtx); return (fprc); } } /* * Drop the bucket lock so we don't try to perform a sleeping * allocation under it. */ mutex_exit(&bucket->ftb_mtx); new_fprc = kmem_zalloc(sizeof (fasttrap_proc_t), KM_SLEEP); new_fprc->ftpc_pid = pid; new_fprc->ftpc_rcount = 1; new_fprc->ftpc_acount = 1; #ifndef illumos mutex_init(&new_fprc->ftpc_mtx, "fasttrap proc mtx", MUTEX_DEFAULT, NULL); #endif mutex_enter(&bucket->ftb_mtx); /* * Take another lap through the list to make sure a proc hasn't * been created for this pid while we weren't under the bucket lock. */ for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { mutex_enter(&fprc->ftpc_mtx); mutex_exit(&bucket->ftb_mtx); fprc->ftpc_rcount++; atomic_inc_64(&fprc->ftpc_acount); ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); mutex_exit(&fprc->ftpc_mtx); kmem_free(new_fprc, sizeof (fasttrap_proc_t)); return (fprc); } } new_fprc->ftpc_next = bucket->ftb_data; bucket->ftb_data = new_fprc; mutex_exit(&bucket->ftb_mtx); return (new_fprc); } static void fasttrap_proc_release(fasttrap_proc_t *proc) { fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, **fprcp; pid_t pid = proc->ftpc_pid; #ifndef illumos fasttrap_scrblock_t *scrblk, *scrblktmp; fasttrap_scrspace_t *scrspc, *scrspctmp; struct proc *p; struct thread *td; #endif mutex_enter(&proc->ftpc_mtx); ASSERT(proc->ftpc_rcount != 0); ASSERT(proc->ftpc_acount <= proc->ftpc_rcount); if (--proc->ftpc_rcount != 0) { mutex_exit(&proc->ftpc_mtx); return; } #ifndef illumos /* * Free all structures used to manage per-thread scratch space. */ LIST_FOREACH_SAFE(scrblk, &proc->ftpc_scrblks, ftsb_next, scrblktmp) { LIST_REMOVE(scrblk, ftsb_next); free(scrblk, M_SOLARIS); } LIST_FOREACH_SAFE(scrspc, &proc->ftpc_fscr, ftss_next, scrspctmp) { LIST_REMOVE(scrspc, ftss_next); free(scrspc, M_SOLARIS); } LIST_FOREACH_SAFE(scrspc, &proc->ftpc_ascr, ftss_next, scrspctmp) { LIST_REMOVE(scrspc, ftss_next); free(scrspc, M_SOLARIS); } if ((p = pfind(pid)) != NULL) { FOREACH_THREAD_IN_PROC(p, td) td->t_dtrace_sscr = NULL; PROC_UNLOCK(p); } #endif mutex_exit(&proc->ftpc_mtx); /* * There should definitely be no live providers associated with this * process at this point. */ ASSERT(proc->ftpc_acount == 0); bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); fprcp = (fasttrap_proc_t **)&bucket->ftb_data; while ((fprc = *fprcp) != NULL) { if (fprc == proc) break; fprcp = &fprc->ftpc_next; } /* * Something strange has happened if we can't find the proc. */ ASSERT(fprc != NULL); *fprcp = fprc->ftpc_next; mutex_exit(&bucket->ftb_mtx); kmem_free(fprc, sizeof (fasttrap_proc_t)); } /* * Lookup a fasttrap-managed provider based on its name and associated pid. * If the pattr argument is non-NULL, this function instantiates the provider * if it doesn't exist otherwise it returns NULL. The provider is returned * with its lock held. */ static fasttrap_provider_t * fasttrap_provider_lookup(pid_t pid, const char *name, const dtrace_pattr_t *pattr) { fasttrap_provider_t *fp, *new_fp = NULL; fasttrap_bucket_t *bucket; char provname[DTRACE_PROVNAMELEN]; proc_t *p; cred_t *cred; ASSERT(strlen(name) < sizeof (fp->ftp_name)); ASSERT(pattr != NULL); bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]; mutex_enter(&bucket->ftb_mtx); /* * Take a lap through the list and return the match if we find it. */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) { mutex_enter(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); return (fp); } } /* * Drop the bucket lock so we don't try to perform a sleeping * allocation under it. */ mutex_exit(&bucket->ftb_mtx); /* * Make sure the process exists, isn't a child created as the result * of a vfork(2), and isn't a zombie (but may be in fork). */ if ((p = pfind(pid)) == NULL) return (NULL); /* * Increment p_dtrace_probes so that the process knows to inform us * when it exits or execs. fasttrap_provider_free() decrements this * when we're done with this provider. */ p->p_dtrace_probes++; /* * Grab the credentials for this process so we have * something to pass to dtrace_register(). */ PROC_LOCK_ASSERT(p, MA_OWNED); crhold(p->p_ucred); cred = p->p_ucred; PROC_UNLOCK(p); new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP); new_fp->ftp_pid = pid; new_fp->ftp_proc = fasttrap_proc_lookup(pid); #ifndef illumos mutex_init(&new_fp->ftp_mtx, "provider mtx", MUTEX_DEFAULT, NULL); mutex_init(&new_fp->ftp_cmtx, "lock on creating", MUTEX_DEFAULT, NULL); #endif ASSERT(new_fp->ftp_proc != NULL); mutex_enter(&bucket->ftb_mtx); /* * Take another lap through the list to make sure a provider hasn't * been created for this pid while we weren't under the bucket lock. */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) { mutex_enter(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); fasttrap_provider_free(new_fp); crfree(cred); return (fp); } } (void) strcpy(new_fp->ftp_name, name); /* * Fail and return NULL if either the provider name is too long * or we fail to register this new provider with the DTrace * framework. Note that this is the only place we ever construct * the full provider name -- we keep it in pieces in the provider * structure. */ if (snprintf(provname, sizeof (provname), "%s%u", name, (uint_t)pid) >= sizeof (provname) || dtrace_register(provname, pattr, DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER, cred, pattr == &pid_attr ? &pid_pops : &usdt_pops, new_fp, &new_fp->ftp_provid) != 0) { mutex_exit(&bucket->ftb_mtx); fasttrap_provider_free(new_fp); crfree(cred); return (NULL); } new_fp->ftp_next = bucket->ftb_data; bucket->ftb_data = new_fp; mutex_enter(&new_fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); crfree(cred); return (new_fp); } static void fasttrap_provider_free(fasttrap_provider_t *provider) { pid_t pid = provider->ftp_pid; proc_t *p; /* * There need to be no associated enabled probes, no consumers * creating probes, and no meta providers referencing this provider. */ ASSERT(provider->ftp_rcount == 0); ASSERT(provider->ftp_ccount == 0); ASSERT(provider->ftp_mcount == 0); /* * If this provider hasn't been retired, we need to explicitly drop the * count of active providers on the associated process structure. */ if (!provider->ftp_retired) { atomic_dec_64(&provider->ftp_proc->ftpc_acount); ASSERT(provider->ftp_proc->ftpc_acount < provider->ftp_proc->ftpc_rcount); } fasttrap_proc_release(provider->ftp_proc); #ifndef illumos mutex_destroy(&provider->ftp_mtx); mutex_destroy(&provider->ftp_cmtx); #endif kmem_free(provider, sizeof (fasttrap_provider_t)); /* * Decrement p_dtrace_probes on the process whose provider we're * freeing. We don't have to worry about clobbering somone else's * modifications to it because we have locked the bucket that * corresponds to this process's hash chain in the provider hash * table. Don't sweat it if we can't find the process. */ if ((p = pfind(pid)) == NULL) { return; } p->p_dtrace_probes--; #ifndef illumos PROC_UNLOCK(p); #endif } static void fasttrap_provider_retire(pid_t pid, const char *name, int mprov) { fasttrap_provider_t *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; ASSERT(strlen(name) < sizeof (fp->ftp_name)); bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]; mutex_enter(&bucket->ftb_mtx); for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && !fp->ftp_retired) break; } if (fp == NULL) { mutex_exit(&bucket->ftb_mtx); return; } mutex_enter(&fp->ftp_mtx); ASSERT(!mprov || fp->ftp_mcount > 0); if (mprov && --fp->ftp_mcount != 0) { mutex_exit(&fp->ftp_mtx); mutex_exit(&bucket->ftb_mtx); return; } /* * Mark the provider to be removed in our post-processing step, mark it * retired, and drop the active count on its proc. Marking it indicates * that we should try to remove it; setting the retired flag indicates * that we're done with this provider; dropping the active the proc * releases our hold, and when this reaches zero (as it will during * exit or exec) the proc and associated providers become defunct. * * We obviously need to take the bucket lock before the provider lock * to perform the lookup, but we need to drop the provider lock * before calling into the DTrace framework since we acquire the * provider lock in callbacks invoked from the DTrace framework. The * bucket lock therefore protects the integrity of the provider hash * table. */ atomic_dec_64(&fp->ftp_proc->ftpc_acount); ASSERT(fp->ftp_proc->ftpc_acount < fp->ftp_proc->ftpc_rcount); fp->ftp_retired = 1; fp->ftp_marked = 1; provid = fp->ftp_provid; mutex_exit(&fp->ftp_mtx); /* * We don't have to worry about invalidating the same provider twice * since fasttrap_provider_lookup() will ignore provider that have * been marked as retired. */ dtrace_invalidate(provid); mutex_exit(&bucket->ftb_mtx); fasttrap_pid_cleanup(); } static int fasttrap_uint32_cmp(const void *ap, const void *bp) { return (*(const uint32_t *)ap - *(const uint32_t *)bp); } static int fasttrap_uint64_cmp(const void *ap, const void *bp) { return (*(const uint64_t *)ap - *(const uint64_t *)bp); } static int fasttrap_add_probe(fasttrap_probe_spec_t *pdata) { fasttrap_provider_t *provider; fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; char *name; int i, aframes = 0, whack; /* * There needs to be at least one desired trace point. */ if (pdata->ftps_noffs == 0) return (EINVAL); switch (pdata->ftps_type) { case DTFTP_ENTRY: name = "entry"; aframes = FASTTRAP_ENTRY_AFRAMES; break; case DTFTP_RETURN: name = "return"; aframes = FASTTRAP_RETURN_AFRAMES; break; case DTFTP_OFFSETS: name = NULL; break; default: return (EINVAL); } if ((provider = fasttrap_provider_lookup(pdata->ftps_pid, FASTTRAP_PID_NAME, &pid_attr)) == NULL) return (ESRCH); /* * Increment this reference count to indicate that a consumer is * actively adding a new probe associated with this provider. This * prevents the provider from being deleted -- we'll need to check * for pending deletions when we drop this reference count. */ provider->ftp_ccount++; mutex_exit(&provider->ftp_mtx); /* * Grab the creation lock to ensure consistency between calls to * dtrace_probe_lookup() and dtrace_probe_create() in the face of * other threads creating probes. We must drop the provider lock * before taking this lock to avoid a three-way deadlock with the * DTrace framework. */ mutex_enter(&provider->ftp_cmtx); if (name == NULL) { for (i = 0; i < pdata->ftps_noffs; i++) { char name_str[17]; (void) sprintf(name_str, "%llx", (unsigned long long)pdata->ftps_offs[i]); if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str) != 0) continue; atomic_inc_32(&fasttrap_total); if (fasttrap_total > fasttrap_max) { atomic_dec_32(&fasttrap_total); goto no_mem; } pp = kmem_zalloc(sizeof (fasttrap_probe_t), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; pp->ftp_fsize = pdata->ftps_size; pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = 1; tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[0].fit_tp = tp; pp->ftp_tps[0].fit_id.fti_probe = pp; pp->ftp_tps[0].fit_id.fti_ptype = pdata->ftps_type; pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str, FASTTRAP_OFFSET_AFRAMES, pp); } } else if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name) == 0) { atomic_add_32(&fasttrap_total, pdata->ftps_noffs); if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); goto no_mem; } /* * Make sure all tracepoint program counter values are unique. * We later assume that each probe has exactly one tracepoint * for a given pc. */ qsort(pdata->ftps_offs, pdata->ftps_noffs, sizeof (uint64_t), fasttrap_uint64_cmp); for (i = 1; i < pdata->ftps_noffs; i++) { if (pdata->ftps_offs[i] > pdata->ftps_offs[i - 1]) continue; atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); goto no_mem; } ASSERT(pdata->ftps_noffs > 0); pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[pdata->ftps_noffs]), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; pp->ftp_fsize = pdata->ftps_size; pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = pdata->ftps_noffs; for (i = 0; i < pdata->ftps_noffs; i++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; pp->ftp_tps[i].fit_id.fti_ptype = pdata->ftps_type; } pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name, aframes, pp); } mutex_exit(&provider->ftp_cmtx); /* * We know that the provider is still valid since we incremented the * creation reference count. If someone tried to clean up this provider * while we were using it (e.g. because the process called exec(2) or * exit(2)), take note of that and try to clean it up now. */ mutex_enter(&provider->ftp_mtx); provider->ftp_ccount--; whack = provider->ftp_retired; mutex_exit(&provider->ftp_mtx); if (whack) fasttrap_pid_cleanup(); return (0); no_mem: /* * If we've exhausted the allowable resources, we'll try to remove * this provider to free some up. This is to cover the case where * the user has accidentally created many more probes than was * intended (e.g. pid123:::). */ mutex_exit(&provider->ftp_cmtx); mutex_enter(&provider->ftp_mtx); provider->ftp_ccount--; provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); fasttrap_pid_cleanup(); return (ENOMEM); } /*ARGSUSED*/ static void * fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) { fasttrap_provider_t *provider; /* * A 32-bit unsigned integer (like a pid for example) can be * expressed in 10 or fewer decimal digits. Make sure that we'll * have enough space for the provider name. */ if (strlen(dhpv->dthpv_provname) + 10 >= sizeof (provider->ftp_name)) { printf("failed to instantiate provider %s: " "name too long to accomodate pid", dhpv->dthpv_provname); return (NULL); } /* * Don't let folks spoof the true pid provider. */ if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) { printf("failed to instantiate provider %s: " "%s is an invalid name", dhpv->dthpv_provname, FASTTRAP_PID_NAME); return (NULL); } /* * The highest stability class that fasttrap supports is ISA; cap * the stability of the new provider accordingly. */ if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA; if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA; if ((provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname, &dhpv->dthpv_pattr)) == NULL) { printf("failed to instantiate provider %s for " "process %u", dhpv->dthpv_provname, (uint_t)pid); return (NULL); } /* * Up the meta provider count so this provider isn't removed until * the meta provider has been told to remove it. */ provider->ftp_mcount++; mutex_exit(&provider->ftp_mtx); return (provider); } /* * We know a few things about our context here: we know that the probe being * created doesn't already exist (DTrace won't load DOF at the same address * twice, even if explicitly told to do so) and we know that we are * single-threaded with respect to the meta provider machinery. Knowing that * this is a new probe and that there is no way for us to race with another * operation on this provider allows us an important optimization: we need not * lookup a probe before adding it. Saving this lookup is important because * this code is in the fork path for processes with USDT probes, and lookups * here are potentially very expensive because of long hash conflicts on * module, function and name (DTrace doesn't hash on provider name). */ /*ARGSUSED*/ static void fasttrap_meta_create_probe(void *arg, void *parg, dtrace_helper_probedesc_t *dhpb) { fasttrap_provider_t *provider = parg; fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; int i, j; uint32_t ntps; /* * Since the meta provider count is non-zero we don't have to worry * about this provider disappearing. */ ASSERT(provider->ftp_mcount > 0); /* * The offsets must be unique. */ qsort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof (uint32_t), fasttrap_uint32_cmp); for (i = 1; i < dhpb->dthpb_noffs; i++) { if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <= dhpb->dthpb_base + dhpb->dthpb_offs[i - 1]) return; } qsort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof (uint32_t), fasttrap_uint32_cmp); for (i = 1; i < dhpb->dthpb_nenoffs; i++) { if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <= dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1]) return; } ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs; ASSERT(ntps > 0); atomic_add_32(&fasttrap_total, ntps); if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -ntps); return; } pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP); pp->ftp_prov = provider; pp->ftp_pid = provider->ftp_pid; pp->ftp_ntps = ntps; pp->ftp_nargs = dhpb->dthpb_xargc; pp->ftp_xtypes = dhpb->dthpb_xtypes; pp->ftp_ntypes = dhpb->dthpb_ntypes; /* * First create a tracepoint for each actual point of interest. */ for (i = 0; i < dhpb->dthpb_noffs; i++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i]; tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; #ifdef __sparc pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_POST_OFFSETS; #else pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS; #endif } /* * Then create a tracepoint for each is-enabled point. */ for (j = 0; i < ntps; i++, j++) { tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j]; tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_IS_ENABLED; } /* * If the arguments are shuffled around we set the argument remapping * table. Later, when the probe fires, we only remap the arguments * if the table is non-NULL. */ for (i = 0; i < dhpb->dthpb_xargc; i++) { if (dhpb->dthpb_args[i] != i) { pp->ftp_argmap = dhpb->dthpb_args; break; } } /* * The probe is fully constructed -- register it with DTrace. */ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod, dhpb->dthpb_func, dhpb->dthpb_name, FASTTRAP_OFFSET_AFRAMES, pp); } /*ARGSUSED*/ static void fasttrap_meta_remove(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) { /* * Clean up the USDT provider. There may be active consumers of the * provider busy adding probes, no damage will actually befall the * provider until that count has dropped to zero. This just puts * the provider on death row. */ fasttrap_provider_retire(pid, dhpv->dthpv_provname, 1); } static dtrace_mops_t fasttrap_mops = { .dtms_create_probe = fasttrap_meta_create_probe, .dtms_provide_pid = fasttrap_meta_provide, .dtms_remove_pid = fasttrap_meta_remove }; /*ARGSUSED*/ static int fasttrap_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) { return (0); } /*ARGSUSED*/ static int fasttrap_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int fflag, struct thread *td) { if (!dtrace_attached()) return (EAGAIN); if (cmd == FASTTRAPIOC_MAKEPROBE) { fasttrap_probe_spec_t *uprobe = *(fasttrap_probe_spec_t **)arg; fasttrap_probe_spec_t *probe; uint64_t noffs; size_t size; int ret, err; if (copyin(&uprobe->ftps_noffs, &noffs, sizeof (uprobe->ftps_noffs))) return (EFAULT); /* * Probes must have at least one tracepoint. */ if (noffs == 0) return (EINVAL); size = sizeof (fasttrap_probe_spec_t) + sizeof (probe->ftps_offs[0]) * (noffs - 1); if (size > 1024 * 1024) return (ENOMEM); probe = kmem_alloc(size, KM_SLEEP); if (copyin(uprobe, probe, size) != 0 || probe->ftps_noffs != noffs) { kmem_free(probe, size); return (EFAULT); } /* * Verify that the function and module strings contain no * funny characters. */ if (u8_validate(probe->ftps_func, strlen(probe->ftps_func), NULL, U8_VALIDATE_ENTIRE, &err) < 0) { ret = EINVAL; goto err; } if (u8_validate(probe->ftps_mod, strlen(probe->ftps_mod), NULL, U8_VALIDATE_ENTIRE, &err) < 0) { ret = EINVAL; goto err; } #ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = probe->ftps_pid; mutex_enter(&pidlock); /* * Report an error if the process doesn't exist * or is actively being birthed. */ if ((p = pfind(pid)) == NULL || p->p_stat == SIDL) { mutex_exit(&pidlock); return (ESRCH); } mutex_enter(&p->p_lock); mutex_exit(&pidlock); if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD | VWRITE)) != 0) { mutex_exit(&p->p_lock); return (ret); } mutex_exit(&p->p_lock); } #endif /* notyet */ ret = fasttrap_add_probe(probe); err: kmem_free(probe, size); return (ret); } else if (cmd == FASTTRAPIOC_GETINSTR) { fasttrap_instr_query_t instr; fasttrap_tracepoint_t *tp; uint_t index; #ifdef notyet int ret; #endif #ifdef illumos if (copyin((void *)arg, &instr, sizeof (instr)) != 0) return (EFAULT); #endif #ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = instr.ftiq_pid; mutex_enter(&pidlock); /* * Report an error if the process doesn't exist * or is actively being birthed. */ if ((p == pfind(pid)) == NULL || p->p_stat == SIDL) { mutex_exit(&pidlock); return (ESRCH); } mutex_enter(&p->p_lock); mutex_exit(&pidlock); if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD)) != 0) { mutex_exit(&p->p_lock); return (ret); } mutex_exit(&p->p_lock); } #endif /* notyet */ index = FASTTRAP_TPOINTS_INDEX(instr.ftiq_pid, instr.ftiq_pc); mutex_enter(&fasttrap_tpoints.fth_table[index].ftb_mtx); tp = fasttrap_tpoints.fth_table[index].ftb_data; while (tp != NULL) { if (instr.ftiq_pid == tp->ftt_pid && instr.ftiq_pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; tp = tp->ftt_next; } if (tp == NULL) { mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx); return (ENOENT); } bcopy(&tp->ftt_instr, &instr.ftiq_instr, sizeof (instr.ftiq_instr)); mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx); if (copyout(&instr, (void *)arg, sizeof (instr)) != 0) return (EFAULT); return (0); } return (EINVAL); } static int fasttrap_load(void) { ulong_t nent; int i, ret; /* Create the /dev/dtrace/fasttrap entry. */ fasttrap_cdev = make_dev(&fasttrap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/fasttrap"); mtx_init(&fasttrap_cleanup_mtx, "fasttrap clean", "dtrace", MTX_DEF); mutex_init(&fasttrap_count_mtx, "fasttrap count mtx", MUTEX_DEFAULT, NULL); #ifdef illumos fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT); #endif fasttrap_total = 0; /* * Conjure up the tracepoints hashtable... */ #ifdef illumos nent = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-hash-size", FASTTRAP_TPOINTS_DEFAULT_SIZE); #else nent = tpoints_hash_size; #endif if (nent == 0 || nent > 0x1000000) nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; tpoints_hash_size = nent; if (ISP2(nent)) fasttrap_tpoints.fth_nent = nent; else fasttrap_tpoints.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_tpoints.fth_nent > 0); fasttrap_tpoints.fth_mask = fasttrap_tpoints.fth_nent - 1; fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_init(&fasttrap_tpoints.fth_table[i].ftb_mtx, "tracepoints bucket mtx", MUTEX_DEFAULT, NULL); #endif /* * ... and the providers hash table... */ nent = FASTTRAP_PROVIDERS_DEFAULT_SIZE; if (ISP2(nent)) fasttrap_provs.fth_nent = nent; else fasttrap_provs.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_provs.fth_nent > 0); fasttrap_provs.fth_mask = fasttrap_provs.fth_nent - 1; fasttrap_provs.fth_table = kmem_zalloc(fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_init(&fasttrap_provs.fth_table[i].ftb_mtx, "providers bucket mtx", MUTEX_DEFAULT, NULL); #endif ret = kproc_create(fasttrap_pid_cleanup_cb, NULL, &fasttrap_cleanup_proc, 0, 0, "ftcleanup"); if (ret != 0) { destroy_dev(fasttrap_cdev); #ifndef illumos for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx); #endif kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t)); mtx_destroy(&fasttrap_cleanup_mtx); mutex_destroy(&fasttrap_count_mtx); return (ret); } /* * ... and the procs hash table. */ nent = FASTTRAP_PROCS_DEFAULT_SIZE; if (ISP2(nent)) fasttrap_procs.fth_nent = nent; else fasttrap_procs.fth_nent = 1 << fasttrap_highbit(nent); ASSERT(fasttrap_procs.fth_nent > 0); fasttrap_procs.fth_mask = fasttrap_procs.fth_nent - 1; fasttrap_procs.fth_table = kmem_zalloc(fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); #ifndef illumos for (i = 0; i < fasttrap_procs.fth_nent; i++) mutex_init(&fasttrap_procs.fth_table[i].ftb_mtx, "processes bucket mtx", MUTEX_DEFAULT, NULL); rm_init(&fasttrap_tp_lock, "fasttrap tracepoint"); /* * This event handler must run before kdtrace_thread_dtor() since it * accesses the thread's struct kdtrace_thread. */ fasttrap_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, fasttrap_thread_dtor, NULL, EVENTHANDLER_PRI_FIRST); #endif /* * Install our hooks into fork(2), exec(2), and exit(2). */ dtrace_fasttrap_fork = &fasttrap_fork; dtrace_fasttrap_exit = &fasttrap_exec_exit; dtrace_fasttrap_exec = &fasttrap_exec_exit; (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); return (0); } static int fasttrap_unload(void) { int i, fail = 0; /* * Unregister the meta-provider to make sure no new fasttrap- * managed providers come along while we're trying to close up * shop. If we fail to detach, we'll need to re-register as a * meta-provider. We can fail to unregister as a meta-provider * if providers we manage still exist. */ if (fasttrap_meta_id != DTRACE_METAPROVNONE && dtrace_meta_unregister(fasttrap_meta_id) != 0) return (-1); /* * Iterate over all of our providers. If there's still a process * that corresponds to that pid, fail to detach. */ for (i = 0; i < fasttrap_provs.fth_nent; i++) { fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket = &fasttrap_provs.fth_table[i]; mutex_enter(&bucket->ftb_mtx); fpp = (fasttrap_provider_t **)&bucket->ftb_data; while ((fp = *fpp) != NULL) { /* * Acquire and release the lock as a simple way of * waiting for any other consumer to finish with * this provider. A thread must first acquire the * bucket lock so there's no chance of another thread * blocking on the provider's lock. */ mutex_enter(&fp->ftp_mtx); mutex_exit(&fp->ftp_mtx); if (dtrace_unregister(fp->ftp_provid) != 0) { fail = 1; fpp = &fp->ftp_next; } else { *fpp = fp->ftp_next; fasttrap_provider_free(fp); } } mutex_exit(&bucket->ftb_mtx); } if (fail) { (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); return (-1); } /* * Stop new processes from entering these hooks now, before the * fasttrap_cleanup thread runs. That way all processes will hopefully * be out of these hooks before we free fasttrap_provs.fth_table */ ASSERT(dtrace_fasttrap_fork == &fasttrap_fork); dtrace_fasttrap_fork = NULL; ASSERT(dtrace_fasttrap_exec == &fasttrap_exec_exit); dtrace_fasttrap_exec = NULL; ASSERT(dtrace_fasttrap_exit == &fasttrap_exec_exit); dtrace_fasttrap_exit = NULL; mtx_lock(&fasttrap_cleanup_mtx); fasttrap_cleanup_drain = 1; /* Wait for the cleanup thread to finish up and signal us. */ wakeup(&fasttrap_cleanup_cv); mtx_sleep(&fasttrap_cleanup_drain, &fasttrap_cleanup_mtx, 0, "ftcld", 0); fasttrap_cleanup_proc = NULL; mtx_destroy(&fasttrap_cleanup_mtx); #ifdef DEBUG mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count == 0); mutex_exit(&fasttrap_count_mtx); #endif #ifndef illumos EVENTHANDLER_DEREGISTER(thread_dtor, fasttrap_thread_dtor_tag); for (i = 0; i < fasttrap_tpoints.fth_nent; i++) mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_provs.fth_nent; i++) mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx); for (i = 0; i < fasttrap_procs.fth_nent; i++) mutex_destroy(&fasttrap_procs.fth_table[i].ftb_mtx); #endif kmem_free(fasttrap_tpoints.fth_table, fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_tpoints.fth_nent = 0; kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_provs.fth_nent = 0; kmem_free(fasttrap_procs.fth_table, fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t)); fasttrap_procs.fth_nent = 0; #ifndef illumos destroy_dev(fasttrap_cdev); mutex_destroy(&fasttrap_count_mtx); rm_destroy(&fasttrap_tp_lock); #endif return (0); } /* ARGSUSED */ static int fasttrap_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(fasttrap_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_load, NULL); SYSUNINIT(fasttrap_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_unload, NULL); DEV_MODULE(fasttrap, fasttrap_modevent, NULL); MODULE_VERSION(fasttrap, 1); MODULE_DEPEND(fasttrap, dtrace, 1, 1, 1); MODULE_DEPEND(fasttrap, opensolaris, 1, 1, 1); Index: head/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c (revision 344451) +++ head/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c (revision 344452) @@ -1,1896 +1,1928 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2010 The FreeBSD Foundation * * $FreeBSD$ */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifdef illumos #pragma ident "%Z%%M% %I% %E% SMI" #endif #include #include #include #include #include #ifdef illumos #include #include #include #include #else #include #include #include #include #include #include #include #include #include #include #endif #include #ifdef illumos #include #include #else #include #endif /* illumos */ #ifdef __i386__ #define r_rax r_eax #define r_rbx r_ebx #define r_rip r_eip #define r_rflags r_eflags #define r_rsp r_esp #define r_rbp r_ebp #endif /* * Lossless User-Land Tracing on x86 * --------------------------------- * * The execution of most instructions is not dependent on the address; for * these instructions it is sufficient to copy them into the user process's * address space and execute them. To effectively single-step an instruction * in user-land, we copy out the following sequence of instructions to scratch * space in the user thread's ulwp_t structure. * * We then set the program counter (%eip or %rip) to point to this scratch * space. Once execution resumes, the original instruction is executed and * then control flow is redirected to what was originally the subsequent * instruction. If the kernel attemps to deliver a signal while single- * stepping, the signal is deferred and the program counter is moved into the * second sequence of instructions. The second sequence ends in a trap into * the kernel where the deferred signal is then properly handled and delivered. * * For instructions whose execute is position dependent, we perform simple * emulation. These instructions are limited to control transfer * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle * of %rip-relative addressing that means that almost any instruction can be * position dependent. For all the details on how we emulate generic * instructions included %rip-relative instructions, see the code in * fasttrap_pid_probe() below where we handle instructions of type * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). */ #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) #define FASTTRAP_REX_B(rex) ((rex) & 1) #define FASTTRAP_REX(w, r, x, b) \ (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) /* * Single-byte op-codes. */ #define FASTTRAP_PUSHL_EBP 0x55 #define FASTTRAP_JO 0x70 #define FASTTRAP_JNO 0x71 #define FASTTRAP_JB 0x72 #define FASTTRAP_JAE 0x73 #define FASTTRAP_JE 0x74 #define FASTTRAP_JNE 0x75 #define FASTTRAP_JBE 0x76 #define FASTTRAP_JA 0x77 #define FASTTRAP_JS 0x78 #define FASTTRAP_JNS 0x79 #define FASTTRAP_JP 0x7a #define FASTTRAP_JNP 0x7b #define FASTTRAP_JL 0x7c #define FASTTRAP_JGE 0x7d #define FASTTRAP_JLE 0x7e #define FASTTRAP_JG 0x7f #define FASTTRAP_NOP 0x90 #define FASTTRAP_MOV_EAX 0xb8 #define FASTTRAP_MOV_ECX 0xb9 #define FASTTRAP_RET16 0xc2 #define FASTTRAP_RET 0xc3 #define FASTTRAP_LOOPNZ 0xe0 #define FASTTRAP_LOOPZ 0xe1 #define FASTTRAP_LOOP 0xe2 #define FASTTRAP_JCXZ 0xe3 #define FASTTRAP_CALL 0xe8 #define FASTTRAP_JMP32 0xe9 #define FASTTRAP_JMP8 0xeb #define FASTTRAP_INT3 0xcc #define FASTTRAP_INT 0xcd #define FASTTRAP_2_BYTE_OP 0x0f #define FASTTRAP_GROUP5_OP 0xff /* * Two-byte op-codes (second byte only). */ #define FASTTRAP_0F_JO 0x80 #define FASTTRAP_0F_JNO 0x81 #define FASTTRAP_0F_JB 0x82 #define FASTTRAP_0F_JAE 0x83 #define FASTTRAP_0F_JE 0x84 #define FASTTRAP_0F_JNE 0x85 #define FASTTRAP_0F_JBE 0x86 #define FASTTRAP_0F_JA 0x87 #define FASTTRAP_0F_JS 0x88 #define FASTTRAP_0F_JNS 0x89 #define FASTTRAP_0F_JP 0x8a #define FASTTRAP_0F_JNP 0x8b #define FASTTRAP_0F_JL 0x8c #define FASTTRAP_0F_JGE 0x8d #define FASTTRAP_0F_JLE 0x8e #define FASTTRAP_0F_JG 0x8f #define FASTTRAP_EFLAGS_OF 0x800 #define FASTTRAP_EFLAGS_DF 0x400 #define FASTTRAP_EFLAGS_SF 0x080 #define FASTTRAP_EFLAGS_ZF 0x040 #define FASTTRAP_EFLAGS_AF 0x010 #define FASTTRAP_EFLAGS_PF 0x004 #define FASTTRAP_EFLAGS_CF 0x001 /* * Instruction prefixes. */ #define FASTTRAP_PREFIX_OPERAND 0x66 #define FASTTRAP_PREFIX_ADDRESS 0x67 #define FASTTRAP_PREFIX_CS 0x2E #define FASTTRAP_PREFIX_DS 0x3E #define FASTTRAP_PREFIX_ES 0x26 #define FASTTRAP_PREFIX_FS 0x64 #define FASTTRAP_PREFIX_GS 0x65 #define FASTTRAP_PREFIX_SS 0x36 #define FASTTRAP_PREFIX_LOCK 0xF0 #define FASTTRAP_PREFIX_REP 0xF3 #define FASTTRAP_PREFIX_REPNE 0xF2 #define FASTTRAP_NOREG 0xff /* * Map between instruction register encodings and the kernel constants which * correspond to indicies into struct regs. */ #ifdef __amd64 static const uint8_t regmap[16] = { REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, }; #else static const uint8_t regmap[8] = { EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI }; #endif static ulong_t fasttrap_getreg(struct reg *, uint_t); static uint64_t fasttrap_anarg(struct reg *rp, int function_entry, int argno) { uint64_t value = 0; int shift = function_entry ? 1 : 0; #ifdef __amd64 if (curproc->p_model == DATAMODEL_LP64) { uintptr_t *stack; /* * In 64-bit mode, the first six arguments are stored in * registers. */ if (argno < 6) switch (argno) { case 0: return (rp->r_rdi); case 1: return (rp->r_rsi); case 2: return (rp->r_rdx); case 3: return (rp->r_rcx); case 4: return (rp->r_r8); case 5: return (rp->r_r9); } stack = (uintptr_t *)rp->r_rsp; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fulword(&stack[argno - 6 + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); } else { #endif uint32_t *stack = (uint32_t *)rp->r_rsp; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fuword32(&stack[argno + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); #ifdef __amd64 } #endif return (value); } /*ARGSUSED*/ int fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, fasttrap_probe_type_t type) { uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; size_t len = FASTTRAP_MAX_INSTR_SIZE; size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); uint_t start = 0; int rmindex, size; uint8_t seg, rex = 0; /* * Read the instruction at the given address out of the process's * address space. We don't have to worry about a debugger * changing this instruction before we overwrite it with our trap * instruction since P_PR_LOCK is set. Since instructions can span * pages, we potentially read the instruction in two parts. If the * second part fails, we just zero out that part of the instruction. */ if (uread(p, &instr[0], first, pc) != 0) return (-1); if (len > first && uread(p, &instr[first], len - first, pc + first) != 0) { bzero(&instr[first], len - first); len = first; } /* * If the disassembly fails, then we have a malformed instruction. */ if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) return (-1); /* * Make sure the disassembler isn't completely broken. */ ASSERT(-1 <= rmindex && rmindex < size); /* * If the computed size is greater than the number of bytes read, * then it was a malformed instruction possibly because it fell on a * page boundary and the subsequent page was missing or because of * some malicious user. */ if (size > len) return (-1); tp->ftt_size = (uint8_t)size; tp->ftt_segment = FASTTRAP_SEG_NONE; /* * Find the start of the instruction's opcode by processing any * legacy prefixes. */ for (;;) { seg = 0; switch (instr[start]) { case FASTTRAP_PREFIX_SS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_GS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_FS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_ES: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_DS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_CS: seg++; /*FALLTHRU*/ case FASTTRAP_PREFIX_OPERAND: case FASTTRAP_PREFIX_ADDRESS: case FASTTRAP_PREFIX_LOCK: case FASTTRAP_PREFIX_REP: case FASTTRAP_PREFIX_REPNE: if (seg != 0) { /* * It's illegal for an instruction to specify * two segment prefixes -- give up on this * illegal instruction. */ if (tp->ftt_segment != FASTTRAP_SEG_NONE) return (-1); tp->ftt_segment = seg; } start++; continue; } break; } #ifdef __amd64 /* * Identify the REX prefix on 64-bit processes. */ if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) rex = instr[start++]; #endif /* * Now that we're pretty sure that the instruction is okay, copy the * valid part to the tracepoint. */ bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); tp->ftt_type = FASTTRAP_T_COMMON; if (instr[start] == FASTTRAP_2_BYTE_OP) { switch (instr[start + 1]) { case FASTTRAP_0F_JO: case FASTTRAP_0F_JNO: case FASTTRAP_0F_JB: case FASTTRAP_0F_JAE: case FASTTRAP_0F_JE: case FASTTRAP_0F_JNE: case FASTTRAP_0F_JBE: case FASTTRAP_0F_JA: case FASTTRAP_0F_JS: case FASTTRAP_0F_JNS: case FASTTRAP_0F_JP: case FASTTRAP_0F_JNP: case FASTTRAP_0F_JL: case FASTTRAP_0F_JGE: case FASTTRAP_0F_JLE: case FASTTRAP_0F_JG: tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 2]; break; } } else if (instr[start] == FASTTRAP_GROUP5_OP) { uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); if (reg == 2 || reg == 4) { uint_t i, sz; if (reg == 2) tp->ftt_type = FASTTRAP_T_CALL; else tp->ftt_type = FASTTRAP_T_JMP; if (mod == 3) tp->ftt_code = 2; else tp->ftt_code = 1; ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); /* * See AMD x86-64 Architecture Programmer's Manual * Volume 3, Section 1.2.7, Table 1-12, and * Appendix A.3.1, Table A-15. */ if (mod != 3 && rm == 4) { uint8_t sib = instr[start + 2]; uint_t index = FASTTRAP_SIB_INDEX(sib); uint_t base = FASTTRAP_SIB_BASE(sib); tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); tp->ftt_index = (index == 4) ? FASTTRAP_NOREG : regmap[index | (FASTTRAP_REX_X(rex) << 3)]; tp->ftt_base = (mod == 0 && base == 5) ? FASTTRAP_NOREG : regmap[base | (FASTTRAP_REX_B(rex) << 3)]; i = 3; sz = mod == 1 ? 1 : 4; } else { /* * In 64-bit mode, mod == 0 and r/m == 5 * denotes %rip-relative addressing; in 32-bit * mode, the base register isn't used. In both * modes, there is a 32-bit operand. */ if (mod == 0 && rm == 5) { #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) tp->ftt_base = REG_RIP; else #endif tp->ftt_base = FASTTRAP_NOREG; sz = 4; } else { uint8_t base = rm | (FASTTRAP_REX_B(rex) << 3); tp->ftt_base = regmap[base]; sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; } tp->ftt_index = FASTTRAP_NOREG; i = 2; } if (sz == 1) { tp->ftt_dest = *(int8_t *)&instr[start + i]; } else if (sz == 4) { /* LINTED - alignment */ tp->ftt_dest = *(int32_t *)&instr[start + i]; } else { tp->ftt_dest = 0; } } } else { switch (instr[start]) { case FASTTRAP_RET: tp->ftt_type = FASTTRAP_T_RET; break; case FASTTRAP_RET16: tp->ftt_type = FASTTRAP_T_RET16; /* LINTED - alignment */ tp->ftt_dest = *(uint16_t *)&instr[start + 1]; break; case FASTTRAP_JO: case FASTTRAP_JNO: case FASTTRAP_JB: case FASTTRAP_JAE: case FASTTRAP_JE: case FASTTRAP_JNE: case FASTTRAP_JBE: case FASTTRAP_JA: case FASTTRAP_JS: case FASTTRAP_JNS: case FASTTRAP_JP: case FASTTRAP_JNP: case FASTTRAP_JL: case FASTTRAP_JGE: case FASTTRAP_JLE: case FASTTRAP_JG: tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = instr[start]; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_LOOPNZ: case FASTTRAP_LOOPZ: case FASTTRAP_LOOP: tp->ftt_type = FASTTRAP_T_LOOP; tp->ftt_code = instr[start]; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_JCXZ: tp->ftt_type = FASTTRAP_T_JCXZ; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_CALL: tp->ftt_type = FASTTRAP_T_CALL; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; tp->ftt_code = 0; break; case FASTTRAP_JMP32: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; break; case FASTTRAP_JMP8: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + (int8_t)instr[start + 1]; break; case FASTTRAP_PUSHL_EBP: if (start == 0) tp->ftt_type = FASTTRAP_T_PUSHL_EBP; break; case FASTTRAP_NOP: #ifdef __amd64 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); /* * On amd64 we have to be careful not to confuse a nop * (actually xchgl %eax, %eax) with an instruction using * the same opcode, but that does something different * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). */ if (FASTTRAP_REX_B(rex) == 0) #endif tp->ftt_type = FASTTRAP_T_NOP; break; case FASTTRAP_INT3: /* * The pid provider shares the int3 trap with debugger * breakpoints so we can't instrument them. */ ASSERT(instr[start] == FASTTRAP_INSTR); return (-1); case FASTTRAP_INT: /* * Interrupts seem like they could be traced with * no negative implications, but it's possible that * a thread could be redirected by the trap handling * code which would eventually return to the * instruction after the interrupt. If the interrupt * were in our scratch space, the subsequent * instruction might be overwritten before we return. * Accordingly we refuse to instrument any interrupt. */ return (-1); } } #ifdef __amd64 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { /* * If the process is 64-bit and the instruction type is still * FASTTRAP_T_COMMON -- meaning we're going to copy it out an * execute it -- we need to watch for %rip-relative * addressing mode. See the portion of fasttrap_pid_probe() * below where we handle tracepoints with type * FASTTRAP_T_COMMON for how we emulate instructions that * employ %rip-relative addressing. */ if (rmindex != -1) { uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); ASSERT(rmindex > start); if (mod == 0 && rm == 5) { /* * We need to be sure to avoid other * registers used by this instruction. While * the reg field may determine the op code * rather than denoting a register, assuming * that it denotes a register is always safe. * We leave the REX field intact and use * whatever value's there for simplicity. */ if (reg != 0) { tp->ftt_ripmode = FASTTRAP_RIP_1 | (FASTTRAP_RIP_X * FASTTRAP_REX_B(rex)); rm = 0; } else { tp->ftt_ripmode = FASTTRAP_RIP_2 | (FASTTRAP_RIP_X * FASTTRAP_REX_B(rex)); rm = 1; } tp->ftt_modrm = tp->ftt_instr[rmindex]; tp->ftt_instr[rmindex] = FASTTRAP_MODRM(2, reg, rm); } } } #endif return (0); } int fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) { fasttrap_instr_t instr = FASTTRAP_INSTR; if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) return (-1); return (0); } int fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) { uint8_t instr; /* * Distinguish between read or write failures and a changed * instruction. */ if (uread(p, &instr, 1, tp->ftt_pc) != 0) return (0); if (instr != FASTTRAP_INSTR) return (0); if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) return (-1); return (0); } #ifdef __amd64 static uintptr_t fasttrap_fulword_noerr(const void *uaddr) { uintptr_t ret; if ((ret = fasttrap_fulword(uaddr)) != -1) return (ret); return (0); } #endif static uint32_t fasttrap_fuword32_noerr(const void *uaddr) { uint32_t ret; if ((ret = fasttrap_fuword32(uaddr)) != -1) return (ret); return (0); } static void fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid, uintptr_t new_pc) { fasttrap_tracepoint_t *tp; fasttrap_bucket_t *bucket; fasttrap_id_t *id; #ifdef illumos kmutex_t *pid_mtx; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; mutex_enter(pid_mtx); #else struct rm_priotracker tracker; rm_rlock(&fasttrap_tp_lock, &tracker); #endif bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * Don't sweat it if we can't find the tracepoint again; unlike * when we're in fasttrap_pid_probe(), finding the tracepoint here * is not essential to the correct execution of the process. */ if (tp == NULL) { #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif return; } for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { /* * If there's a branch that could act as a return site, we * need to trace it, and check here if the program counter is * external to the function. */ if (tp->ftt_type != FASTTRAP_T_RET && tp->ftt_type != FASTTRAP_T_RET16 && new_pc - id->fti_probe->ftp_faddr < id->fti_probe->ftp_fsize) continue; dtrace_probe(id->fti_probe->ftp_id, pc - id->fti_probe->ftp_faddr, rp->r_rax, rp->r_rbx, 0, 0); } #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif } static void fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) { #ifdef illumos sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGSEGV; sqp->sq_info.si_code = SEGV_MAPERR; sqp->sq_info.si_addr = (caddr_t)addr; mutex_enter(&p->p_lock); sigaddqa(p, t, sqp); mutex_exit(&p->p_lock); if (t != NULL) aston(t); #else ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); ksiginfo_init(ksi); ksi->ksi_signo = SIGSEGV; ksi->ksi_code = SEGV_MAPERR; ksi->ksi_addr = (caddr_t)addr; (void) tdksignal(t, SIGSEGV, ksi); #endif } #ifdef __amd64 static void fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc, uintptr_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); uintptr_t *stack = (uintptr_t *)rp->r_rsp; for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; if (x < 6) argv[i] = (&rp->r_rdi)[x]; else argv[i] = fasttrap_fulword_noerr(&stack[x]); } for (; i < argc; i++) { argv[i] = 0; } } #endif static void fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc, uint32_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); uint32_t *stack = (uint32_t *)rp->r_rsp; for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; argv[i] = fasttrap_fuword32_noerr(&stack[x]); } for (; i < argc; i++) { argv[i] = 0; } } static int fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr) { proc_t *p = curproc; #ifdef __i386__ struct segment_descriptor *desc; #else struct user_segment_descriptor *desc; #endif uint16_t sel = 0, ndx, type; uintptr_t limit; switch (tp->ftt_segment) { case FASTTRAP_SEG_CS: sel = rp->r_cs; break; case FASTTRAP_SEG_DS: sel = rp->r_ds; break; case FASTTRAP_SEG_ES: sel = rp->r_es; break; case FASTTRAP_SEG_FS: sel = rp->r_fs; break; case FASTTRAP_SEG_GS: sel = rp->r_gs; break; case FASTTRAP_SEG_SS: sel = rp->r_ss; break; } /* * Make sure the given segment register specifies a user priority * selector rather than a kernel selector. */ if (ISPL(sel) != SEL_UPL) return (-1); ndx = IDXSEL(sel); /* * Check the bounds and grab the descriptor out of the specified * descriptor table. */ if (ISLDT(sel)) { #ifdef __i386__ if (ndx > p->p_md.md_ldt->ldt_len) return (-1); desc = (struct segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #else if (ndx > max_ldt_segment) return (-1); desc = (struct user_segment_descriptor *) p->p_md.md_ldt[ndx].ldt_base; #endif } else { if (ndx >= NGDT) return (-1); #ifdef __i386__ desc = &gdt[ndx].sd; #else desc = &gdt[ndx]; #endif } /* * The descriptor must have user privilege level and it must be * present in memory. */ if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1) return (-1); type = desc->sd_type; /* * If the S bit in the type field is not set, this descriptor can * only be used in system context. */ if ((type & 0x10) != 0x10) return (-1); limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1); if (tp->ftt_segment == FASTTRAP_SEG_CS) { /* * The code/data bit and readable bit must both be set. */ if ((type & 0xa) != 0xa) return (-1); if (*addr > limit) return (-1); } else { /* * The code/data bit must be clear. */ if ((type & 0x8) != 0) return (-1); /* * If the expand-down bit is clear, we just check the limit as * it would naturally be applied. Otherwise, we need to check * that the address is the range [limit + 1 .. 0xffff] or * [limit + 1 ... 0xffffffff] depending on if the default * operand size bit is set. */ if ((type & 0x4) == 0) { if (*addr > limit) return (-1); } else if (desc->sd_def32) { if (*addr < limit + 1 || 0xffff < *addr) return (-1); } else { if (*addr < limit + 1 || 0xffffffff < *addr) return (-1); } } *addr += USD_GETBASE(desc); return (0); } int fasttrap_pid_probe(struct trapframe *tf) { struct reg reg, *rp; proc_t *p = curproc, *pp; struct rm_priotracker tracker; + uint64_t gen; uintptr_t pc; uintptr_t new_pc = 0; fasttrap_bucket_t *bucket; #ifdef illumos kmutex_t *pid_mtx; #endif fasttrap_tracepoint_t *tp, tp_local; pid_t pid; dtrace_icookie_t cookie; uint_t is_enabled = 0; fill_frame_regs(tf, ®); rp = ® pc = rp->r_rip - 1; /* * It's possible that a user (in a veritable orgy of bad planning) * could redirect this thread's flow of control before it reached the * return probe fasttrap. In this case we need to kill the process * since it's in a unrecoverable state. */ if (curthread->t_dtrace_step) { ASSERT(curthread->t_dtrace_on); fasttrap_sigtrap(p, curthread, pc); return (0); } /* * Clear all user tracing flags. */ curthread->t_dtrace_ft = 0; curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; #ifdef __amd64 curthread->t_dtrace_regv = 0; #endif /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ #ifdef illumos while (p->p_flag & SVFORK) { p = p->p_parent; } pid = p->p_pid; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; mutex_enter(pid_mtx); #else pp = p; sx_slock(&proctree_lock); while (pp->p_vmspace == pp->p_pptr->p_vmspace) pp = pp->p_pptr; pid = pp->p_pid; + if (pp != p) { + PROC_LOCK(pp); + if ((pp->p_flag & P_WEXIT) != 0) { + /* + * This can happen if the child was created with + * rfork(2). Userspace tracing cannot work reliably in + * such a scenario, but we can at least try. + */ + PROC_UNLOCK(pp); + sx_sunlock(&proctree_lock); + return (-1); + } + _PHOLD_LITE(pp); + PROC_UNLOCK(pp); + } sx_sunlock(&proctree_lock); - pp = NULL; rm_rlock(&fasttrap_tp_lock, &tracker); #endif bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; /* * Lookup the tracepoint that the process just hit. */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && tp->ftt_proc->ftpc_acount != 0) break; } /* * If we couldn't find a matching tracepoint, either a tracepoint has * been inserted without using the pid ioctl interface (see * fasttrap_ioctl), or somehow we have mislaid this tracepoint. */ if (tp == NULL) { #ifdef illumos mutex_exit(pid_mtx); + return (-1); #else rm_runlock(&fasttrap_tp_lock, &tracker); -#endif + gen = atomic_load_acq_64(&pp->p_fasttrap_tp_gen); + if (pp != p) + PRELE(pp); + if (curthread->t_fasttrap_tp_gen != gen) { + /* + * At least one tracepoint associated with this PID has + * been removed from the table since #BP was raised. + * Speculate that we hit a tracepoint that has since + * been removed, and retry the instruction. + */ + curthread->t_fasttrap_tp_gen = gen; + tf->tf_rip = pc; + return (0); + } return (-1); +#endif } + if (pp != p) + PRELE(pp); /* * Set the program counter to the address of the traced instruction * so that it looks right in ustack() output. */ rp->r_rip = pc; if (tp->ftt_ids != NULL) { fasttrap_id_t *id; #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) { for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, rp->r_rdi, rp->r_rsi, rp->r_rdx, rp->r_rcx, rp->r_r8); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, rp->r_rdi, rp->r_rsi, rp->r_rdx, rp->r_rcx, rp->r_r8); } else { uintptr_t t[5]; fasttrap_usdt_args64(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } } else { #endif uintptr_t s0, s1, s2, s3, s4, s5; uint32_t *stack = (uint32_t *)rp->r_rsp; /* * In 32-bit mode, all arguments are passed on the * stack. If this is a function entry probe, we need * to skip the first entry on the stack as it * represents the return address rather than a * parameter to the function. */ s0 = fasttrap_fuword32_noerr(&stack[0]); s1 = fasttrap_fuword32_noerr(&stack[1]); s2 = fasttrap_fuword32_noerr(&stack[2]); s3 = fasttrap_fuword32_noerr(&stack[3]); s4 = fasttrap_fuword32_noerr(&stack[4]); s5 = fasttrap_fuword32_noerr(&stack[5]); for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { fasttrap_probe_t *probe = id->fti_probe; if (id->fti_ptype == DTFTP_ENTRY) { /* * We note that this was an entry * probe to help ustack() find the * first caller. */ cookie = dtrace_interrupt_disable(); DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); dtrace_probe(probe->ftp_id, s1, s2, s3, s4, s5); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); dtrace_interrupt_enable(cookie); } else if (id->fti_ptype == DTFTP_IS_ENABLED) { /* * Note that in this case, we don't * call dtrace_probe() since it's only * an artificial probe meant to change * the flow of control so that it * encounters the true probe. */ is_enabled = 1; } else if (probe->ftp_argmap == NULL) { dtrace_probe(probe->ftp_id, s0, s1, s2, s3, s4); } else { uint32_t t[5]; fasttrap_usdt_args32(probe, rp, sizeof (t) / sizeof (t[0]), t); dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]); } } #ifdef __amd64 } #endif } /* * We're about to do a bunch of work so we cache a local copy of * the tracepoint to emulate the instruction, and then find the * tracepoint again later if we need to light up any return probes. */ tp_local = *tp; #ifdef illumos mutex_exit(pid_mtx); #else rm_runlock(&fasttrap_tp_lock, &tracker); #endif tp = &tp_local; /* * Set the program counter to appear as though the traced instruction * had completely executed. This ensures that fasttrap_getreg() will * report the expected value for REG_RIP. */ rp->r_rip = pc + tp->ftt_size; /* * If there's an is-enabled probe connected to this tracepoint it * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' * instruction that was placed there by DTrace when the binary was * linked. As this probe is, in fact, enabled, we need to stuff 1 * into %eax or %rax. Accordingly, we can bypass all the instruction * emulation logic since we know the inevitable result. It's possible * that a user could construct a scenario where the 'is-enabled' * probe was on some other instruction, but that would be a rather * exotic way to shoot oneself in the foot. */ if (is_enabled) { rp->r_rax = 1; new_pc = rp->r_rip; goto done; } /* * We emulate certain types of instructions to ensure correctness * (in the case of position dependent instructions) or optimize * common cases. The rest we have the thread execute back in user- * land. */ switch (tp->ftt_type) { case FASTTRAP_T_RET: case FASTTRAP_T_RET16: { uintptr_t dst = 0; uintptr_t addr = 0; int ret = 0; /* * We have to emulate _every_ facet of the behavior of a ret * instruction including what happens if the load from %esp * fails; in that case, we send a SIGSEGV. */ #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { ret = dst = fasttrap_fulword((void *)rp->r_rsp); addr = rp->r_rsp + sizeof (uintptr_t); } else { #endif uint32_t dst32; ret = dst32 = fasttrap_fuword32((void *)rp->r_rsp); dst = dst32; addr = rp->r_rsp + sizeof (uint32_t); #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, rp->r_rsp); new_pc = pc; break; } if (tp->ftt_type == FASTTRAP_T_RET16) addr += tp->ftt_dest; rp->r_rsp = addr; new_pc = dst; break; } case FASTTRAP_T_JCC: { uint_t taken = 0; switch (tp->ftt_code) { case FASTTRAP_JO: taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0; break; case FASTTRAP_JNO: taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0; break; case FASTTRAP_JB: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0; break; case FASTTRAP_JAE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0; break; case FASTTRAP_JE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; break; case FASTTRAP_JNE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; break; case FASTTRAP_JBE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 || (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; break; case FASTTRAP_JA: taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 && (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; break; case FASTTRAP_JS: taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0; break; case FASTTRAP_JNS: taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0; break; case FASTTRAP_JP: taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0; break; case FASTTRAP_JNP: taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0; break; case FASTTRAP_JL: taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JGE: taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JLE: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 || ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; case FASTTRAP_JG: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); break; } if (taken) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_LOOP: { uint_t taken = 0; #ifdef __amd64 greg_t cx = rp->r_rcx--; #else greg_t cx = rp->r_ecx--; #endif switch (tp->ftt_code) { case FASTTRAP_LOOPNZ: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && cx != 0; break; case FASTTRAP_LOOPZ: taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 && cx != 0; break; case FASTTRAP_LOOP: taken = (cx != 0); break; } if (taken) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_JCXZ: { #ifdef __amd64 greg_t cx = rp->r_rcx; #else greg_t cx = rp->r_ecx; #endif if (cx == 0) new_pc = tp->ftt_dest; else new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_PUSHL_EBP: { int ret = 0; #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { rp->r_rsp -= sizeof (uintptr_t); ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp); } else { #endif rp->r_rsp -= sizeof (uint32_t); ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp); #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, rp->r_rsp); new_pc = pc; break; } new_pc = pc + tp->ftt_size; break; } case FASTTRAP_T_NOP: new_pc = pc + tp->ftt_size; break; case FASTTRAP_T_JMP: case FASTTRAP_T_CALL: if (tp->ftt_code == 0) { new_pc = tp->ftt_dest; } else { uintptr_t value, addr = tp->ftt_dest; if (tp->ftt_base != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_base); if (tp->ftt_index != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_index) << tp->ftt_scale; if (tp->ftt_code == 1) { /* * If there's a segment prefix for this * instruction, we'll need to check permissions * and bounds on the given selector, and adjust * the address accordingly. */ if (tp->ftt_segment != FASTTRAP_SEG_NONE && fasttrap_do_seg(tp, rp, &addr) != 0) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { #endif if ((value = fasttrap_fulword((void *)addr)) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } new_pc = value; #ifdef __amd64 } else { uint32_t value32; addr = (uintptr_t)(uint32_t)addr; if ((value32 = fasttrap_fuword32((void *)addr)) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } new_pc = value32; } #endif } else { new_pc = addr; } } /* * If this is a call instruction, we need to push the return * address onto the stack. If this fails, we send the process * a SIGSEGV and reset the pc to emulate what would happen if * this instruction weren't traced. */ if (tp->ftt_type == FASTTRAP_T_CALL) { int ret = 0; uintptr_t addr = 0, pcps; #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { addr = rp->r_rsp - sizeof (uintptr_t); pcps = pc + tp->ftt_size; ret = fasttrap_sulword((void *)addr, pcps); } else { #endif addr = rp->r_rsp - sizeof (uint32_t); pcps = (uint32_t)(pc + tp->ftt_size); ret = fasttrap_suword32((void *)addr, pcps); #ifdef __amd64 } #endif if (ret == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } rp->r_rsp = addr; } break; case FASTTRAP_T_COMMON: { uintptr_t addr; #if defined(__amd64) uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; #else uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; #endif uint_t i = 0; #ifdef illumos klwp_t *lwp = ttolwp(curthread); /* * Compute the address of the ulwp_t and step over the * ul_self pointer. The method used to store the user-land * thread pointer is very different on 32- and 64-bit * kernels. */ #if defined(__amd64) if (p->p_model == DATAMODEL_LP64) { addr = lwp->lwp_pcb.pcb_fsbase; addr += sizeof (void *); } else { addr = lwp->lwp_pcb.pcb_gsbase; addr += sizeof (caddr32_t); } #else addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); addr += sizeof (void *); #endif #else /* !illumos */ fasttrap_scrspace_t *scrspace; scrspace = fasttrap_scraddr(curthread, tp->ftt_proc); if (scrspace == NULL) { /* * We failed to allocate scratch space for this thread. * Try to write the original instruction back out and * reset the pc. */ if (fasttrap_copyout(tp->ftt_instr, (void *)pc, tp->ftt_size)) fasttrap_sigtrap(p, curthread, pc); new_pc = pc; break; } addr = scrspace->ftss_addr; #endif /* illumos */ /* * Generic Instruction Tracing * --------------------------- * * This is the layout of the scratch space in the user-land * thread structure for our generated instructions. * * 32-bit mode bytes * ------------------------ ----- * a: <= 15 * jmp ftt_size> 5 * b: <= 15 * int T_DTRACE_RET 2 * ----- * <= 37 * * 64-bit mode bytes * ------------------------ ----- * a: <= 15 * jmp 0(%rip) 6 * ftt_size> 8 * b: <= 15 * int T_DTRACE_RET 2 * ----- * <= 46 * * The %pc is set to a, and curthread->t_dtrace_astpc is set * to b. If we encounter a signal on the way out of the * kernel, trap() will set %pc to curthread->t_dtrace_astpc * so that we execute the original instruction and re-enter * the kernel rather than redirecting to the next instruction. * * If there are return probes (so we know that we're going to * need to reenter the kernel after executing the original * instruction), the scratch space will just contain the * original instruction followed by an interrupt -- the same * data as at b. * * %rip-relative Addressing * ------------------------ * * There's a further complication in 64-bit mode due to %rip- * relative addressing. While this is clearly a beneficial * architectural decision for position independent code, it's * hard not to see it as a personal attack against the pid * provider since before there was a relatively small set of * instructions to emulate; with %rip-relative addressing, * almost every instruction can potentially depend on the * address at which it's executed. Rather than emulating * the broad spectrum of instructions that can now be * position dependent, we emulate jumps and others as in * 32-bit mode, and take a different tack for instructions * using %rip-relative addressing. * * For every instruction that uses the ModRM byte, the * in-kernel disassembler reports its location. We use the * ModRM byte to identify that an instruction uses * %rip-relative addressing and to see what other registers * the instruction uses. To emulate those instructions, * we modify the instruction to be %rax-relative rather than * %rip-relative (or %rcx-relative if the instruction uses * %rax; or %r8- or %r9-relative if the REX.B is present so * we don't have to rewrite the REX prefix). We then load * the value that %rip would have been into the scratch * register and generate an instruction to reset the scratch * register back to its original value. The instruction * sequence looks like this: * * 64-mode %rip-relative bytes * ------------------------ ----- * a: <= 15 * movq $, % 6 * jmp 0(%rip) 6 * ftt_size> 8 * b: <= 15 * int T_DTRACE_RET 2 * ----- * 52 * * We set curthread->t_dtrace_regv so that upon receiving * a signal we can reset the value of the scratch register. */ ASSERT(tp->ftt_size <= FASTTRAP_MAX_INSTR_SIZE); curthread->t_dtrace_scrpc = addr; bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); i += tp->ftt_size; #ifdef __amd64 if (tp->ftt_ripmode != 0) { greg_t *reg = NULL; ASSERT(p->p_model == DATAMODEL_LP64); ASSERT(tp->ftt_ripmode & (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); /* * If this was a %rip-relative instruction, we change * it to be either a %rax- or %rcx-relative * instruction (depending on whether those registers * are used as another operand; or %r8- or %r9- * relative depending on the value of REX.B). We then * set that register and generate a movq instruction * to reset the value. */ if (tp->ftt_ripmode & FASTTRAP_RIP_X) scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); else scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); if (tp->ftt_ripmode & FASTTRAP_RIP_1) scratch[i++] = FASTTRAP_MOV_EAX; else scratch[i++] = FASTTRAP_MOV_ECX; switch (tp->ftt_ripmode) { case FASTTRAP_RIP_1: reg = &rp->r_rax; curthread->t_dtrace_reg = REG_RAX; break; case FASTTRAP_RIP_2: reg = &rp->r_rcx; curthread->t_dtrace_reg = REG_RCX; break; case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: reg = &rp->r_r8; curthread->t_dtrace_reg = REG_R8; break; case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: reg = &rp->r_r9; curthread->t_dtrace_reg = REG_R9; break; } /* LINTED - alignment */ *(uint64_t *)&scratch[i] = *reg; curthread->t_dtrace_regv = *reg; *reg = pc + tp->ftt_size; i += sizeof (uint64_t); } #endif /* * Generate the branch instruction to what would have * normally been the subsequent instruction. In 32-bit mode, * this is just a relative branch; in 64-bit mode this is a * %rip-relative branch that loads the 64-bit pc value * immediately after the jmp instruction. */ #ifdef __amd64 if (p->p_model == DATAMODEL_LP64) { scratch[i++] = FASTTRAP_GROUP5_OP; scratch[i++] = FASTTRAP_MODRM(0, 4, 5); /* LINTED - alignment */ *(uint32_t *)&scratch[i] = 0; i += sizeof (uint32_t); /* LINTED - alignment */ *(uint64_t *)&scratch[i] = pc + tp->ftt_size; i += sizeof (uint64_t); } else { #endif /* * Set up the jmp to the next instruction; note that * the size of the traced instruction cancels out. */ scratch[i++] = FASTTRAP_JMP32; /* LINTED - alignment */ *(uint32_t *)&scratch[i] = pc - addr - 5; i += sizeof (uint32_t); #ifdef __amd64 } #endif curthread->t_dtrace_astpc = addr + i; bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); i += tp->ftt_size; scratch[i++] = FASTTRAP_INT; scratch[i++] = T_DTRACE_RET; ASSERT(i <= sizeof (scratch)); if (fasttrap_copyout(scratch, (char *)addr, i)) { fasttrap_sigtrap(p, curthread, pc); new_pc = pc; break; } if (tp->ftt_retids != NULL) { curthread->t_dtrace_step = 1; curthread->t_dtrace_ret = 1; new_pc = curthread->t_dtrace_astpc; } else { new_pc = curthread->t_dtrace_scrpc; } curthread->t_dtrace_pc = pc; curthread->t_dtrace_npc = pc + tp->ftt_size; curthread->t_dtrace_on = 1; break; } default: panic("fasttrap: mishandled an instruction"); } done: /* * If there were no return probes when we first found the tracepoint, * we should feel no obligation to honor any return probes that were * subsequently enabled -- they'll just have to wait until the next * time around. */ if (tp->ftt_retids != NULL) { /* * We need to wait until the results of the instruction are * apparent before invoking any return probes. If this * instruction was emulated we can just call * fasttrap_return_common(); if it needs to be executed, we * need to wait until the user thread returns to the kernel. */ if (tp->ftt_type != FASTTRAP_T_COMMON) { /* * Set the program counter to the address of the traced * instruction so that it looks right in ustack() * output. We had previously set it to the end of the * instruction to simplify %rip-relative addressing. */ rp->r_rip = pc; fasttrap_return_common(rp, pc, pid, new_pc); } else { ASSERT(curthread->t_dtrace_ret != 0); ASSERT(curthread->t_dtrace_pc == pc); ASSERT(curthread->t_dtrace_scrpc != 0); ASSERT(new_pc == curthread->t_dtrace_astpc); } } rp->r_rip = new_pc; #ifndef illumos PROC_LOCK(p); proc_write_regs(curthread, rp); PROC_UNLOCK(p); #endif return (0); } int fasttrap_return_probe(struct trapframe *tf) { struct reg reg, *rp; proc_t *p = curproc; uintptr_t pc = curthread->t_dtrace_pc; uintptr_t npc = curthread->t_dtrace_npc; fill_frame_regs(tf, ®); rp = ® curthread->t_dtrace_pc = 0; curthread->t_dtrace_npc = 0; curthread->t_dtrace_scrpc = 0; curthread->t_dtrace_astpc = 0; #ifdef illumos /* * Treat a child created by a call to vfork(2) as if it were its * parent. We know that there's only one thread of control in such a * process: this one. */ while (p->p_flag & SVFORK) { p = p->p_parent; } #endif /* * We set rp->r_rip to the address of the traced instruction so * that it appears to dtrace_probe() that we're on the original * instruction. */ rp->r_rip = pc; fasttrap_return_common(rp, pc, p->p_pid, npc); return (0); } /*ARGSUSED*/ uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, 1, argno)); } /*ARGSUSED*/ uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { struct reg r; fill_regs(curthread, &r); return (fasttrap_anarg(&r, 0, argno)); } static ulong_t fasttrap_getreg(struct reg *rp, uint_t reg) { #ifdef __amd64 switch (reg) { case REG_R15: return (rp->r_r15); case REG_R14: return (rp->r_r14); case REG_R13: return (rp->r_r13); case REG_R12: return (rp->r_r12); case REG_R11: return (rp->r_r11); case REG_R10: return (rp->r_r10); case REG_R9: return (rp->r_r9); case REG_R8: return (rp->r_r8); case REG_RDI: return (rp->r_rdi); case REG_RSI: return (rp->r_rsi); case REG_RBP: return (rp->r_rbp); case REG_RBX: return (rp->r_rbx); case REG_RDX: return (rp->r_rdx); case REG_RCX: return (rp->r_rcx); case REG_RAX: return (rp->r_rax); case REG_TRAPNO: return (rp->r_trapno); case REG_ERR: return (rp->r_err); case REG_RIP: return (rp->r_rip); case REG_CS: return (rp->r_cs); case REG_RFL: return (rp->r_rflags); case REG_RSP: return (rp->r_rsp); case REG_SS: return (rp->r_ss); case REG_FS: return (rp->r_fs); case REG_GS: return (rp->r_gs); case REG_DS: return (rp->r_ds); case REG_ES: return (rp->r_es); case REG_FSBASE: return (rdmsr(MSR_FSBASE)); case REG_GSBASE: return (rdmsr(MSR_GSBASE)); } panic("dtrace: illegal register constant"); /*NOTREACHED*/ #else #define _NGREG 19 if (reg >= _NGREG) panic("dtrace: illegal register constant"); return (((greg_t *)&rp->r_gs)[reg]); #endif } Index: head/sys/cddl/dev/dtrace/dtrace_cddl.h =================================================================== --- head/sys/cddl/dev/dtrace/dtrace_cddl.h (revision 344451) +++ head/sys/cddl/dev/dtrace/dtrace_cddl.h (revision 344452) @@ -1,171 +1,174 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * $FreeBSD$ * */ #ifndef _DTRACE_CDDL_H_ #define _DTRACE_CDDL_H_ #include #define LOCK_LEVEL 10 /* * Kernel DTrace extension to 'struct proc' for FreeBSD. */ typedef struct kdtrace_proc { int p_dtrace_probes; /* Are there probes for this proc? */ u_int64_t p_dtrace_count; /* Number of DTrace tracepoints */ void *p_dtrace_helpers; /* DTrace helpers, if any */ int p_dtrace_model; - + uint64_t p_fasttrap_tp_gen; /* Tracepoint hash table gen */ } kdtrace_proc_t; /* * Kernel DTrace extension to 'struct thread' for FreeBSD. */ typedef struct kdtrace_thread { u_int8_t td_dtrace_stop; /* Indicates a DTrace-desired stop */ u_int8_t td_dtrace_sig; /* Signal sent via DTrace's raise() */ u_int8_t td_dtrace_inprobe; /* Are we in a probe? */ u_int td_predcache; /* DTrace predicate cache */ u_int64_t td_dtrace_vtime; /* DTrace virtual time */ u_int64_t td_dtrace_start; /* DTrace slice start time */ union __tdu { struct __tds { u_int8_t _td_dtrace_on; /* Hit a fasttrap tracepoint. */ u_int8_t _td_dtrace_step; /* About to return to kernel. */ u_int8_t _td_dtrace_ret; /* Handling a return probe. */ u_int8_t _td_dtrace_ast; /* Saved ast flag. */ #ifdef __amd64__ u_int8_t _td_dtrace_reg; #endif } _tds; u_long _td_dtrace_ft; /* Bitwise or of these flags. */ } _tdu; #define td_dtrace_ft _tdu._td_dtrace_ft #define td_dtrace_on _tdu._tds._td_dtrace_on #define td_dtrace_step _tdu._tds._td_dtrace_step #define td_dtrace_ret _tdu._tds._td_dtrace_ret #define td_dtrace_ast _tdu._tds._td_dtrace_ast #define td_dtrace_reg _tdu._tds._td_dtrace_reg uintptr_t td_dtrace_pc; /* DTrace saved pc from fasttrap. */ uintptr_t td_dtrace_npc; /* DTrace next pc from fasttrap. */ uintptr_t td_dtrace_scrpc; /* DTrace per-thread scratch location. */ uintptr_t td_dtrace_astpc; /* DTrace return sequence location. */ #ifdef __amd64__ uintptr_t td_dtrace_regv; #endif u_int64_t td_hrtime; /* Last time on cpu. */ void *td_dtrace_sscr; /* Saved scratch space location. */ void *td_systrace_args; /* syscall probe arguments. */ + uint64_t td_fasttrap_tp_gen; /* Tracepoint hash table gen. */ } kdtrace_thread_t; /* * Definitions to reference fields in the FreeBSD DTrace structures defined * above using the names of fields in similar structures in Solaris. Note * that the separation on FreeBSD is a licensing constraint designed to * keep the GENERIC kernel BSD licensed. */ #define t_dtrace_vtime td_dtrace->td_dtrace_vtime #define t_dtrace_start td_dtrace->td_dtrace_start #define t_dtrace_stop td_dtrace->td_dtrace_stop #define t_dtrace_sig td_dtrace->td_dtrace_sig #define t_dtrace_inprobe td_dtrace->td_dtrace_inprobe #define t_predcache td_dtrace->td_predcache #define t_dtrace_ft td_dtrace->td_dtrace_ft #define t_dtrace_on td_dtrace->td_dtrace_on #define t_dtrace_step td_dtrace->td_dtrace_step #define t_dtrace_ret td_dtrace->td_dtrace_ret #define t_dtrace_ast td_dtrace->td_dtrace_ast #define t_dtrace_reg td_dtrace->td_dtrace_reg #define t_dtrace_pc td_dtrace->td_dtrace_pc #define t_dtrace_npc td_dtrace->td_dtrace_npc #define t_dtrace_scrpc td_dtrace->td_dtrace_scrpc #define t_dtrace_astpc td_dtrace->td_dtrace_astpc #define t_dtrace_regv td_dtrace->td_dtrace_regv #define t_dtrace_sscr td_dtrace->td_dtrace_sscr #define t_dtrace_systrace_args td_dtrace->td_systrace_args +#define t_fasttrap_tp_gen td_dtrace->td_fasttrap_tp_gen #define p_dtrace_helpers p_dtrace->p_dtrace_helpers #define p_dtrace_count p_dtrace->p_dtrace_count #define p_dtrace_probes p_dtrace->p_dtrace_probes #define p_model p_dtrace->p_dtrace_model +#define p_fasttrap_tp_gen p_dtrace->p_fasttrap_tp_gen #define DATAMODEL_NATIVE 0 #ifdef __amd64__ #define DATAMODEL_LP64 0 #define DATAMODEL_ILP32 1 #else #define DATAMODEL_LP64 1 #define DATAMODEL_ILP32 0 #endif /* * Definitions for fields in struct proc which are named differently in FreeBSD. */ #define p_cred p_ucred #define p_parent p_pptr /* * Definitions for fields in struct thread which are named differently in FreeBSD. */ #define t_procp td_proc #define t_tid td_tid #define t_did td_tid #define t_cred td_ucred int priv_policy(const cred_t *, int, boolean_t, int, const char *); boolean_t priv_policy_only(const cred_t *, int, boolean_t); boolean_t priv_policy_choice(const cred_t *, int, boolean_t); /* * Test privilege. Audit success or failure, allow privilege debugging. * Returns 0 for success, err for failure. */ #define PRIV_POLICY(cred, priv, all, err, reason) \ priv_policy((cred), (priv), (all), (err), (reason)) /* * Test privilege. Audit success only, no privilege debugging. * Returns 1 for success, and 0 for failure. */ #define PRIV_POLICY_CHOICE(cred, priv, all) \ priv_policy_choice((cred), (priv), (all)) /* * Test privilege. No priv_debugging, no auditing. * Returns 1 for success, and 0 for failure. */ #define PRIV_POLICY_ONLY(cred, priv, all) \ priv_policy_only((cred), (priv), (all)) #endif /* !_DTRACE_CDDL_H_ */