diff --git a/sys/cddl/dev/kinst/kinst.c b/sys/cddl/dev/kinst/kinst.c index 4bd3047f49db..60400a452b95 100644 --- a/sys/cddl/dev/kinst/kinst.c +++ b/sys/cddl/dev/kinst/kinst.c @@ -1,327 +1,330 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright (c) 2022 Christos Margiolis * Copyright (c) 2023 The FreeBSD Foundation * * Portions of this software were developed by Christos Margiolis * under sponsorship from the FreeBSD Foundation. */ #include #include #include #include #include #include #include #include "kinst.h" MALLOC_DEFINE(M_KINST, "kinst", "Kernel Instruction Tracing"); static d_open_t kinst_open; static d_close_t kinst_close; static d_ioctl_t kinst_ioctl; static void kinst_provide_module(void *, modctl_t *); static void kinst_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); static void kinst_destroy(void *, dtrace_id_t, void *); static void kinst_enable(void *, dtrace_id_t, void *); static void kinst_disable(void *, dtrace_id_t, void *); static int kinst_load(void *); static int kinst_unload(void *); static int kinst_modevent(module_t, int, void *); static dtrace_pattr_t kinst_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, }; static const dtrace_pops_t kinst_pops = { .dtps_provide = NULL, .dtps_provide_module = kinst_provide_module, .dtps_enable = kinst_enable, .dtps_disable = kinst_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = kinst_getargdesc, .dtps_getargval = NULL, .dtps_usermode = NULL, .dtps_destroy = kinst_destroy }; static struct cdevsw kinst_cdevsw = { .d_name = "kinst", .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = kinst_open, .d_close = kinst_close, .d_ioctl = kinst_ioctl, }; static dtrace_provider_id_t kinst_id; struct kinst_probe_list *kinst_probetab; static struct cdev *kinst_cdev; /* * Tracing memcpy() will crash the kernel when kinst tries to trace an instance * of the memcpy() calls in kinst_invop(). To fix this, we can use * kinst_memcpy() in those cases, with its arguments marked as 'volatile' to * "outsmart" the compiler and avoid having it replaced by a regular memcpy(). */ volatile void * kinst_memcpy(volatile void *dst, volatile const void *src, size_t len) { volatile const unsigned char *src0; volatile unsigned char *dst0; src0 = src; dst0 = dst; while (len--) *dst0++ = *src0++; return (dst); } bool kinst_excluded(const char *name) { if (kinst_md_excluded(name)) return (true); /* * cpu_switch() can cause a crash if it modifies the value of curthread * while in probe context. */ if (strcmp(name, "cpu_switch") == 0) return (true); /* * Anything beginning with "dtrace_" may be called from probe context * unless it explicitly indicates that it won't be called from probe * context by using the prefix "dtrace_safe_". */ if (strncmp(name, "dtrace_", strlen("dtrace_")) == 0 && strncmp(name, "dtrace_safe_", strlen("dtrace_safe_")) != 0) return (true); /* * Omit instrumentation of functions that are probably in DDB. It * makes it too hard to debug broken kinst. * * NB: kdb_enter() can be excluded, but its call to printf() can't be. * This is generally OK since we're not yet in debugging context. */ if (strncmp(name, "db_", strlen("db_")) == 0 || strncmp(name, "kdb_", strlen("kdb_")) == 0) return (true); /* * Lock owner methods may be called from probe context. */ if (strcmp(name, "owner_mtx") == 0 || strcmp(name, "owner_rm") == 0 || strcmp(name, "owner_rw") == 0 || strcmp(name, "owner_sx") == 0) return (true); /* * When DTrace is built into the kernel we need to exclude the kinst * functions from instrumentation. */ #ifndef _KLD_MODULE if (strncmp(name, "kinst_", strlen("kinst_")) == 0) return (true); #endif if (strcmp(name, "trap_check") == 0) return (true); return (false); } void kinst_probe_create(struct kinst_probe *kp, linker_file_t lf) { kp->kp_id = dtrace_probe_create(kinst_id, lf->filename, kp->kp_func, kp->kp_name, 3, kp); LIST_INSERT_HEAD(KINST_GETPROBE(kp->kp_patchpoint), kp, kp_hashnext); } static int kinst_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) { return (0); } static int kinst_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused, struct thread *td __unused) { dtrace_condense(kinst_id); return (0); } static int kinst_linker_file_cb(linker_file_t lf, void *arg) { dtrace_kinst_probedesc_t *pd; pd = arg; if (pd->kpd_mod[0] != '\0' && strcmp(pd->kpd_mod, lf->filename) != 0) return (0); /* * Invoke kinst_make_probe_function() once for each function symbol in * the module "lf". */ return (linker_file_function_listall(lf, kinst_make_probe, arg)); } static int kinst_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td __unused) { dtrace_kinst_probedesc_t *pd; int error = 0; switch (cmd) { case KINSTIOC_MAKEPROBE: pd = (dtrace_kinst_probedesc_t *)addr; pd->kpd_func[sizeof(pd->kpd_func) - 1] = '\0'; pd->kpd_mod[sizeof(pd->kpd_mod) - 1] = '\0'; /* Loop over all functions in the kernel and loaded modules. */ error = linker_file_foreach(kinst_linker_file_cb, pd); break; default: error = ENOTTY; break; } return (error); } static void kinst_provide_module(void *arg, modctl_t *lf) { } static void kinst_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { desc->dtargd_ndx = DTRACE_ARGNONE; } static void kinst_destroy(void *arg, dtrace_id_t id, void *parg) { struct kinst_probe *kp = parg; LIST_REMOVE(kp, kp_hashnext); +#ifndef __amd64__ + kinst_trampoline_dealloc(kp->kp_tramp); +#endif free(kp, M_KINST); } static void kinst_enable(void *arg, dtrace_id_t id, void *parg) { struct kinst_probe *kp = parg; static bool warned = false; if (!warned) { KINST_LOG( "kinst: This provider is experimental, exercise caution"); warned = true; } kinst_patch_tracepoint(kp, kp->kp_patchval); } static void kinst_disable(void *arg, dtrace_id_t id, void *parg) { struct kinst_probe *kp = parg; kinst_patch_tracepoint(kp, kp->kp_savedval); } static int kinst_load(void *dummy) { int error; error = kinst_trampoline_init(); if (error != 0) return (error); error = kinst_md_init(); if (error != 0) { kinst_trampoline_deinit(); return (error); } error = dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL, &kinst_pops, NULL, &kinst_id); if (error != 0) { kinst_md_deinit(); kinst_trampoline_deinit(); return (error); } kinst_probetab = malloc(KINST_PROBETAB_MAX * sizeof(struct kinst_probe_list), M_KINST, M_WAITOK | M_ZERO); for (int i = 0; i < KINST_PROBETAB_MAX; i++) LIST_INIT(&kinst_probetab[i]); kinst_cdev = make_dev(&kinst_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/kinst"); dtrace_invop_add(kinst_invop); return (0); } static int kinst_unload(void *dummy) { free(kinst_probetab, M_KINST); kinst_md_deinit(); kinst_trampoline_deinit(); dtrace_invop_remove(kinst_invop); destroy_dev(kinst_cdev); return (dtrace_unregister(kinst_id)); } static int kinst_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(kinst_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_load, NULL); SYSUNINIT(kinst_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_unload, NULL); DEV_MODULE(kinst, kinst_modevent, NULL); MODULE_VERSION(kinst, 1); MODULE_DEPEND(kinst, dtrace, 1, 1, 1); MODULE_DEPEND(kinst, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/kinst/kinst.h b/sys/cddl/dev/kinst/kinst.h index 0a47eb4f3583..390a2d1c13bf 100644 --- a/sys/cddl/dev/kinst/kinst.h +++ b/sys/cddl/dev/kinst/kinst.h @@ -1,84 +1,112 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright (c) 2022 Christos Margiolis * Copyright (c) 2023 The FreeBSD Foundation * * Portions of this software were developed by Christos Margiolis * under sponsorship from the FreeBSD Foundation. */ #ifndef _KINST_H_ #define _KINST_H_ #include typedef struct { char kpd_func[DTRACE_FUNCNAMELEN]; char kpd_mod[DTRACE_MODNAMELEN]; int kpd_off; } dtrace_kinst_probedesc_t; #define KINSTIOC_MAKEPROBE _IOW('k', 1, dtrace_kinst_probedesc_t) #ifdef _KERNEL #include #include "kinst_isa.h" struct kinst_probe { LIST_ENTRY(kinst_probe) kp_hashnext; const char *kp_func; char kp_name[16]; dtrace_id_t kp_id; kinst_patchval_t kp_patchval; kinst_patchval_t kp_savedval; kinst_patchval_t *kp_patchpoint; + uint8_t *kp_tramp; struct kinst_probe_md kp_md; }; +struct kinst_cpu_state { + /* + * kinst uses a breakpoint to return from the trampoline and resume + * execution. To do this safely, kinst implements a per-CPU state + * machine; the state is set to KINST_PROBE_FIRED for the duration of + * the trampoline execution (i.e from the time we transfer execution to + * it, until we return). Upon return, the state is set to + * KINST_PROBE_ARMED to indicate that a probe is not currently firing. + * All CPUs have their state initialized to KINST_PROBE_ARMED when + * kinst is loaded. + */ + enum { + KINST_PROBE_ARMED, + KINST_PROBE_FIRED, + } state; + /* + * Points to the probe whose trampoline we're currently executing. + */ + struct kinst_probe *kp; + /* + * Because we execute trampolines with interrupts disabled, we have to + * cache the CPU's status in order to restore it when we return from + * the trampoline. + */ + uint64_t status; +}; + LIST_HEAD(kinst_probe_list, kinst_probe); extern struct kinst_probe_list *kinst_probetab; #define KINST_PROBETAB_MAX 0x8000 /* 32k */ #define KINST_ADDR2NDX(addr) (((uintptr_t)(addr)) & (KINST_PROBETAB_MAX - 1)) #define KINST_GETPROBE(i) (&kinst_probetab[KINST_ADDR2NDX(i)]) struct linker_file; struct linker_symval; /* kinst.c */ volatile void *kinst_memcpy(volatile void *, volatile const void *, size_t); bool kinst_excluded(const char *); void kinst_probe_create(struct kinst_probe *, struct linker_file *); /* arch/kinst_isa.c */ int kinst_invop(uintptr_t, struct trapframe *, uintptr_t); void kinst_patch_tracepoint(struct kinst_probe *, kinst_patchval_t); int kinst_make_probe(struct linker_file *, int, struct linker_symval *, void *); int kinst_md_init(void); void kinst_md_deinit(void); bool kinst_md_excluded(const char *); /* trampoline.c */ int kinst_trampoline_init(void); int kinst_trampoline_deinit(void); uint8_t *kinst_trampoline_alloc(int); void kinst_trampoline_dealloc(uint8_t *); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_KINST); #endif /* MALLOC_DECLARE */ #define KINST_LOG_HELPER(fmt, ...) \ printf("%s:%d: " fmt "%s\n", __func__, __LINE__, __VA_ARGS__) #define KINST_LOG(...) \ KINST_LOG_HELPER(__VA_ARGS__, "") #endif /* _KERNEL */ #endif /* _KINST_H_ */ diff --git a/sys/cddl/dev/kinst/trampoline.c b/sys/cddl/dev/kinst/trampoline.c index 87c01e39745b..adc4eaa7fceb 100644 --- a/sys/cddl/dev/kinst/trampoline.c +++ b/sys/cddl/dev/kinst/trampoline.c @@ -1,329 +1,354 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright (c) 2022 Christos Margiolis * Copyright (c) 2022 Mark Johnston * Copyright (c) 2023 The FreeBSD Foundation * * Portions of this software were developed by Christos Margiolis * under sponsorship from the FreeBSD Foundation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "kinst.h" #include "kinst_isa.h" #define KINST_TRAMP_FILL_PATTERN ((kinst_patchval_t []){KINST_PATCHVAL}) #define KINST_TRAMP_FILL_SIZE sizeof(kinst_patchval_t) #define KINST_TRAMPCHUNK_SIZE PAGE_SIZE #define KINST_TRAMPS_PER_CHUNK (KINST_TRAMPCHUNK_SIZE / KINST_TRAMP_SIZE) struct trampchunk { TAILQ_ENTRY(trampchunk) next; uint8_t *addr; /* 0 -> allocated, 1 -> free */ BITSET_DEFINE(, KINST_TRAMPS_PER_CHUNK) free; }; static TAILQ_HEAD(, trampchunk) kinst_trampchunks = TAILQ_HEAD_INITIALIZER(kinst_trampchunks); static struct sx kinst_tramp_sx; SX_SYSINIT(kinst_tramp_sx, &kinst_tramp_sx, "kinst tramp"); +#ifdef __amd64__ static eventhandler_tag kinst_thread_ctor_handler; static eventhandler_tag kinst_thread_dtor_handler; +#endif /* * Fill the trampolines with KINST_TRAMP_FILL_PATTERN so that the kernel will * crash cleanly if things somehow go wrong. */ static void kinst_trampoline_fill(uint8_t *addr, int size) { int i; for (i = 0; i < size; i += KINST_TRAMP_FILL_SIZE) { memcpy(&addr[i], KINST_TRAMP_FILL_PATTERN, KINST_TRAMP_FILL_SIZE); } } static struct trampchunk * kinst_trampchunk_alloc(void) { struct trampchunk *chunk; vm_offset_t trampaddr; int error __diagused; sx_assert(&kinst_tramp_sx, SX_XLOCKED); #ifdef __amd64__ /* * To simplify population of trampolines, we follow the amd64 kernel's * code model and allocate them above KERNBASE, i.e., in the top 2GB of * the kernel's virtual address space (not the case for other * platforms). */ trampaddr = KERNBASE; #else trampaddr = VM_MIN_KERNEL_ADDRESS; #endif /* * Allocate virtual memory for the trampoline chunk. The returned * address is saved in "trampaddr". Trampolines must be executable so * max_prot must include VM_PROT_EXECUTE. */ error = vm_map_find(kernel_map, NULL, 0, &trampaddr, KINST_TRAMPCHUNK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) { KINST_LOG("trampoline chunk allocation failed: %d", error); return (NULL); } error = kmem_back(kernel_object, trampaddr, KINST_TRAMPCHUNK_SIZE, M_WAITOK | M_EXEC); KASSERT(error == KERN_SUCCESS, ("kmem_back failed: %d", error)); kinst_trampoline_fill((uint8_t *)trampaddr, KINST_TRAMPCHUNK_SIZE); /* Allocate a tracker for this chunk. */ chunk = malloc(sizeof(*chunk), M_KINST, M_WAITOK); chunk->addr = (void *)trampaddr; BIT_FILL(KINST_TRAMPS_PER_CHUNK, &chunk->free); TAILQ_INSERT_HEAD(&kinst_trampchunks, chunk, next); return (chunk); } static void kinst_trampchunk_free(struct trampchunk *chunk) { sx_assert(&kinst_tramp_sx, SX_XLOCKED); TAILQ_REMOVE(&kinst_trampchunks, chunk, next); kmem_unback(kernel_object, (vm_offset_t)chunk->addr, KINST_TRAMPCHUNK_SIZE); (void)vm_map_remove(kernel_map, (vm_offset_t)chunk->addr, (vm_offset_t)(chunk->addr + KINST_TRAMPCHUNK_SIZE)); free(chunk, M_KINST); } static uint8_t * kinst_trampoline_alloc_locked(int how) { struct trampchunk *chunk; uint8_t *tramp; int off; sx_assert(&kinst_tramp_sx, SX_XLOCKED); TAILQ_FOREACH(chunk, &kinst_trampchunks, next) { /* All trampolines from this chunk are already allocated. */ if ((off = BIT_FFS(KINST_TRAMPS_PER_CHUNK, &chunk->free)) == 0) continue; /* BIT_FFS() returns indices starting at 1 instead of 0. */ off--; break; } if (chunk == NULL) { if ((how & M_NOWAIT) != 0) return (NULL); - /* - * We didn't find any free trampoline in the current list, - * allocate a new one. If that fails the provider will no - * longer be reliable, so try to warn the user. - */ if ((chunk = kinst_trampchunk_alloc()) == NULL) { +#ifdef __amd64__ + /* + * We didn't find any free trampoline in the current + * list, allocate a new one. If that fails the + * provider will no longer be reliable, so try to warn + * the user. + */ static bool once = true; if (once) { once = false; KINST_LOG( "kinst: failed to allocate trampoline, " "probes may not fire"); } +#endif return (NULL); } off = 0; } BIT_CLR(KINST_TRAMPS_PER_CHUNK, off, &chunk->free); tramp = chunk->addr + off * KINST_TRAMP_SIZE; return (tramp); } uint8_t * kinst_trampoline_alloc(int how) { uint8_t *tramp; sx_xlock(&kinst_tramp_sx); tramp = kinst_trampoline_alloc_locked(how); sx_xunlock(&kinst_tramp_sx); return (tramp); } static void kinst_trampoline_dealloc_locked(uint8_t *tramp, bool freechunks) { struct trampchunk *chunk; int off; sx_assert(&kinst_tramp_sx, SX_XLOCKED); if (tramp == NULL) return; TAILQ_FOREACH(chunk, &kinst_trampchunks, next) { for (off = 0; off < KINST_TRAMPS_PER_CHUNK; off++) { if (chunk->addr + off * KINST_TRAMP_SIZE == tramp) { kinst_trampoline_fill(tramp, KINST_TRAMP_SIZE); BIT_SET(KINST_TRAMPS_PER_CHUNK, off, &chunk->free); if (freechunks && BIT_ISFULLSET(KINST_TRAMPS_PER_CHUNK, &chunk->free)) kinst_trampchunk_free(chunk); return; } } } panic("%s: did not find trampoline chunk for %p", __func__, tramp); } void kinst_trampoline_dealloc(uint8_t *tramp) { sx_xlock(&kinst_tramp_sx); kinst_trampoline_dealloc_locked(tramp, true); sx_xunlock(&kinst_tramp_sx); } +#ifdef __amd64__ static void kinst_thread_ctor(void *arg __unused, struct thread *td) { td->t_kinst_tramp = kinst_trampoline_alloc(M_WAITOK); } static void kinst_thread_dtor(void *arg __unused, struct thread *td) { void *tramp; tramp = td->t_kinst_tramp; td->t_kinst_tramp = NULL; /* * This assumes that the thread_dtor event permits sleeping, which * appears to be true for the time being. */ kinst_trampoline_dealloc(tramp); } +#endif int kinst_trampoline_init(void) { +#ifdef __amd64__ struct proc *p; struct thread *td; void *tramp; int error; kinst_thread_ctor_handler = EVENTHANDLER_REGISTER(thread_ctor, kinst_thread_ctor, NULL, EVENTHANDLER_PRI_ANY); kinst_thread_dtor_handler = EVENTHANDLER_REGISTER(thread_dtor, kinst_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); error = 0; tramp = NULL; sx_slock(&allproc_lock); sx_xlock(&kinst_tramp_sx); FOREACH_PROC_IN_SYSTEM(p) { retry: PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) { if (td->t_kinst_tramp != NULL) continue; if (tramp == NULL) { /* * Try to allocate a trampoline without dropping * the process lock. If all chunks are fully * utilized, we must release the lock and try * again. */ tramp = kinst_trampoline_alloc_locked(M_NOWAIT); if (tramp == NULL) { PROC_UNLOCK(p); tramp = kinst_trampoline_alloc_locked( M_WAITOK); if (tramp == NULL) { /* * Let the unload handler clean * up. */ error = ENOMEM; goto out; } else goto retry; } } td->t_kinst_tramp = tramp; tramp = NULL; } PROC_UNLOCK(p); } out: sx_xunlock(&kinst_tramp_sx); sx_sunlock(&allproc_lock); +#else + int error = 0; + + sx_xlock(&kinst_tramp_sx); + TAILQ_INIT(&kinst_trampchunks); + sx_xunlock(&kinst_tramp_sx); +#endif + return (error); } int kinst_trampoline_deinit(void) { +#ifdef __amd64__ struct trampchunk *chunk, *tmp; struct proc *p; struct thread *td; EVENTHANDLER_DEREGISTER(thread_ctor, kinst_thread_ctor_handler); EVENTHANDLER_DEREGISTER(thread_dtor, kinst_thread_dtor_handler); sx_slock(&allproc_lock); sx_xlock(&kinst_tramp_sx); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) { kinst_trampoline_dealloc_locked(td->t_kinst_tramp, false); td->t_kinst_tramp = NULL; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); TAILQ_FOREACH_SAFE(chunk, &kinst_trampchunks, next, tmp) kinst_trampchunk_free(chunk); sx_xunlock(&kinst_tramp_sx); +#else + struct trampchunk *chunk, *tmp; + + sx_xlock(&kinst_tramp_sx); + TAILQ_FOREACH_SAFE(chunk, &kinst_trampchunks, next, tmp) + kinst_trampchunk_free(chunk); + sx_xunlock(&kinst_tramp_sx); +#endif return (0); }