diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c @@ -701,6 +701,30 @@ prp = dt_probe_discover(pvp, pdp); } + if (strcmp(pvp->pv_desc.dtvd_name, "kinst") == 0) { + dtrace_kinst_probedesc_t pd; + int dev; + + if ((dev = open("/dev/dtrace/kinst", O_WRONLY)) < 0) + return (NULL); + strlcpy(pd.func, pdp->dtpd_func, sizeof(pd.func)); + /* + * TODO: what do we do in case func is a wildcard? + * TODO: allow range syntax (x-y) + */ + /* + * Signify wildcards with off = -1 and create probes for all + * instructions at once instead of calling the ioctl for every + * single instruction. + */ + if (n_is_glob) + pd.off = -1; + else + pd.off = strtol(pdp->dtpd_name, NULL, 10); + if (ioctl(dev, DTRACEIOC_KINST_MKPROBE, &pd) != 0) + return (NULL); + } + /* * If no probe was found in our cache, convert the caller's partial * probe description into a fully-formed matching probe description by diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h --- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h @@ -1338,6 +1338,15 @@ /* get DOF */ #define DTRACEIOC_REPLICATE _IOW('x',18,dtrace_repldesc_t) /* replicate enab */ +typedef struct { + char func[DTRACE_FUNCNAMELEN]; + int off; +} dtrace_kinst_probedesc_t; +#define DTRACEIOC_KINST_MKPROBE _IOW('x',19,dtrace_kinst_probedesc_t) + /* + * request probe + * creation for kinst + */ #endif /* diff --git a/sys/cddl/dev/kinst/extern.h b/sys/cddl/dev/kinst/extern.h new file mode 100644 --- /dev/null +++ b/sys/cddl/dev/kinst/extern.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: CDDL 1.0 + */ +#ifndef _EXTERN_H_ +#define _EXTERN_H_ + +#define KINST_LOG_HELPER(fmt, ...) \ + printf("%s:%d: " fmt "%s\n", __func__, __LINE__, __VA_ARGS__) +#define KINST_LOG(...) \ + KINST_LOG_HELPER(__VA_ARGS__, "") + +#ifdef __amd64__ +#define KINST_PATCHVAL 0xcc +#else +#define KINST_PATCHVAL 0xf0 +#endif /* __amd64__ */ + +typedef uint8_t kinst_patchval_t; + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_KINST); +#endif /* MALLOC_DECLARE */ + +#endif /* _EXTERN_H_ */ diff --git a/sys/cddl/dev/kinst/kinst.h b/sys/cddl/dev/kinst/kinst.h new file mode 100644 --- /dev/null +++ b/sys/cddl/dev/kinst/kinst.h @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: CDDL 1.0 + */ +#ifndef _KINST_H_ +#define _KINST_H_ + +#include +#include "extern.h" + +#define KINST_PROBE_MAX 0x8000 /* 32k */ + +struct linker_file; +struct linker_symval; + +struct kinst_probe { + TAILQ_ENTRY(kinst_probe) kp_next; + char kp_name[16]; + dtrace_id_t kp_id; + uint8_t *kp_trampoline; + kinst_patchval_t *kp_patchpoint; + kinst_patchval_t kp_patchval; + kinst_patchval_t kp_savedval; +}; + +int kinst_invop(uintptr_t, struct trapframe *, uintptr_t); +void kinst_patch_tracepoint(struct kinst_probe *, kinst_patchval_t); +int kinst_make_probe(struct linker_file *, int, struct linker_symval *, + void *); + +#endif /* _KINST_H_ */ diff --git a/sys/cddl/dev/kinst/kinst.c b/sys/cddl/dev/kinst/kinst.c new file mode 100644 --- /dev/null +++ b/sys/cddl/dev/kinst/kinst.c @@ -0,0 +1,481 @@ +/* + * SPDX-License-Identifier: CDDL 1.0 + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "kinst.h" +#include "trampoline.h" + +#define KINST_PUSHL_EBP 0x55 + +#define KINST_CALL 0xe8 +#define KINST_JMP 0xe9 +#define KINST_JMP_LEN 5 + +#define KINST_NEARJMP_PREFIX 0x0f +#define KINST_NEARJMP_FIRST 0x80 +#define KINST_NEARJMP_LAST 0x8f +#define KINST_NEARJMP_LEN 6 + +#define KINST_UNCOND_SHORTJMP 0xeb +#define KINST_SHORTJMP_FIRST 0x70 +#define KINST_SHORTJMP_LAST 0x7f +#define KINST_SHORTJMP_LEN 2 + +#define KINST_MODRM_RIPREL 0x05 +#define KINST_MOD(b) (((b) & 0xc0) >> 6) +#define KINST_RM(b) ((b) & 0x07) + +MALLOC_DEFINE(M_KINST, "kinst", "Kernel Instruction Tracing"); + +static d_open_t kinst_open; +static d_close_t kinst_close; +static d_ioctl_t kinst_ioctl; + +static int kinst_linker_file_cb(linker_file_t, void *); +static int kinst_dis_get_byte(void *); +static int32_t kinst_displ(uint8_t *, uint8_t *, int); +static int kinst_is_call_or_uncond_jmp(uint8_t *); +static int kinst_is_short_jmp(uint8_t *); +static int kinst_is_near_jmp(uint8_t *); +static int kinst_is_jmp(uint8_t *); +static void kinst_provide_module(void *, modctl_t *); +static void kinst_getargdesc(void *, dtrace_id_t, void *, + dtrace_argdesc_t *); +static void kinst_destroy(void *, dtrace_id_t, void *); +static void kinst_enable(void *, dtrace_id_t, void *); +static void kinst_disable(void *, dtrace_id_t, void *); +static void kinst_load(void *); +static int kinst_unload(void); +static int kinst_modevent(module_t, int, void *); + +static dtrace_pattr_t kinst_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t kinst_pops = { + .dtps_provide = NULL, + .dtps_provide_module = kinst_provide_module, + .dtps_enable = kinst_enable, + .dtps_disable = kinst_disable, + .dtps_suspend = NULL, + .dtps_resume = NULL, + .dtps_getargdesc = kinst_getargdesc, + .dtps_getargval = NULL, + .dtps_usermode = NULL, + .dtps_destroy = kinst_destroy +}; + +static struct cdevsw kinst_cdevsw = { + .d_name = "kinst", + .d_version = D_VERSION, + .d_flags = D_TRACKCLOSE, + .d_open = kinst_open, + .d_close = kinst_close, + .d_ioctl = kinst_ioctl, +}; + +static struct cdev *kinst_cdev; +static dtrace_provider_id_t kinst_id; +/* TODO: convert to hashtable */ +TAILQ_HEAD(, kinst_probe) kinst_probes; + +int +kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) +{ + solaris_cpu_t *cpu; + uintptr_t *stack; + struct kinst_probe *kp; + +#ifdef __amd64__ + stack = (uintptr_t *)frame->tf_rsp; +#else + /* Skip hardware-saved registers. */ + stack = (uintptr_t *)frame->tf_isp + 3; +#endif + cpu = &solaris_cpu[curcpu]; + + /* FIXME: not thread-safe */ + TAILQ_FOREACH(kp, &kinst_probes, kp_next) { + if ((uintptr_t)kp->kp_patchpoint != addr) + continue; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); + dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0); + cpu->cpu_dtrace_caller = 0; + /* Redirect execution to the trampoline after iret. */ + frame->tf_rip = (register_t)kp->kp_trampoline; + + return (DTRACE_INVOP_NOP); + } + + return (0); +} + +void +kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val) +{ + register_t reg; + int oldwp; + + reg = intr_disable(); + oldwp = disable_wp(); + *kp->kp_patchpoint = val; + restore_wp(oldwp); + intr_restore(reg); +} + +int +kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, + void *opaque) +{ + struct kinst_probe *kp; + dis86_t d86; + dtrace_kinst_probedesc_t *pd; + int n, off, mode, opclen, trlen; + int32_t displ, origdispl; + uint8_t *instr, *limit, *bytes; + + pd = opaque; + if (strcmp(symval->name, pd->func) != 0 || + strcmp(symval->name, "trap_check") == 0) + return (0); + + instr = (uint8_t *)symval->value; + limit = (uint8_t *)symval->value + symval->size; + mode = (DATAMODEL_LP64 == DATAMODEL_NATIVE) ? SIZE64 : SIZE32; + + if (instr >= limit) + return (0); + if (instr[0] != KINST_PUSHL_EBP) + return (0); + + n = 0; + /* TODO: explain */ + while (instr < limit) { + off = (int)(instr - (uint8_t *)symval->value); + /* + * If pd->off is -1 we want to create probes for all + * instructions at once to reduce overhead. + */ + if (pd->off != off && pd->off != -1) { + instr += dtrace_instr_size(instr); + continue; + } + if (++n > KINST_PROBE_MAX) { + KINST_LOG("probe list full: %d entries", n); + return (ENOMEM); + } + kp = malloc(sizeof(struct kinst_probe), M_KINST, M_WAITOK | M_ZERO); + snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off); + /* + * Save the first byte of the instruction so that we can + * recover it when the probe is disabled. + */ + kp->kp_savedval = *instr; + kp->kp_patchval = KINST_PATCHVAL; + kp->kp_patchpoint = instr; + if ((kp->kp_trampoline = kinst_trampoline_alloc()) == NULL) { + KINST_LOG("cannot allocate trampoline for: %p", instr); + return (ENOMEM); + } + d86.d86_data = (void **)&instr; + d86.d86_get_byte = kinst_dis_get_byte; + d86.d86_check_func = NULL; + if (dtrace_disx86(&d86, mode) != 0) { + KINST_LOG("failed to disassemble instruction at: %p", instr); + return (EINVAL); + } + bytes = d86.d86_bytes; + /* + * Copy current instruction to the trampoline to be executed + * when the probe fires. In case the instruction takes %rip as + * an implicit operand, we have to modify it first in order for + * the offset encodings to be correct. + */ + if (kinst_is_jmp(bytes)) { + opclen = kinst_is_near_jmp(bytes) ? 2 : 1; + memcpy(&origdispl, &bytes[opclen], sizeof(origdispl)); + if (kinst_is_short_jmp(bytes)) { + if (*bytes == KINST_UNCOND_SHORTJMP) { + /* + * Convert unconditional short JMP to a + * regular JMP. + */ + kp->kp_trampoline[0] = KINST_JMP; + trlen = KINST_JMP_LEN; + } else { + /* + * "Recalculate" the opcode length + * since we are converting from a short + * to near jump. That's a hack. + */ + opclen = 0; + kp->kp_trampoline[opclen++] = + KINST_NEARJMP_PREFIX; + /* + * Convert short-jump to its near-jmp + * equivalent. + */ + kp->kp_trampoline[opclen++] = + *bytes + 0x10; + trlen = KINST_NEARJMP_LEN; + } + displ = kinst_displ(instr - d86.d86_len + + (origdispl & 0xff) + KINST_SHORTJMP_LEN, + kp->kp_trampoline, trlen); + } else { + if (kinst_is_call_or_uncond_jmp(bytes)) + trlen = KINST_JMP_LEN; + else + trlen = KINST_NEARJMP_LEN; + memcpy(kp->kp_trampoline, bytes, opclen); + displ = kinst_displ(instr - d86.d86_len + + origdispl + trlen, kp->kp_trampoline, trlen); + } + memcpy(&kp->kp_trampoline[opclen], &displ, sizeof(displ)); + } else if (d86.d86_got_modrm && + KINST_MOD(bytes[d86.d86_rmindex]) == 0 && + KINST_RM(bytes[d86.d86_rmindex]) == 5) { + opclen = d86.d86_rmindex + 1; + trlen = d86.d86_len; + memcpy(&origdispl, &bytes[d86.d86_rmindex + 1], + sizeof(origdispl)); + memcpy(kp->kp_trampoline, bytes, d86.d86_rmindex + 1); + /* + * Create a new %rip-relative instruction with a + * recalculated offset to %rip. + */ + displ = kinst_displ(instr - d86.d86_len + + origdispl + trlen, kp->kp_trampoline, trlen); + memcpy(&kp->kp_trampoline[opclen], &displ, sizeof(displ)); + } else { + memcpy(kp->kp_trampoline, d86.d86_bytes, d86.d86_len); + trlen = d86.d86_len; + } + /* + * Encode a jmp back to the next instruction so that the thread + * can continue execution normally. + */ + kp->kp_trampoline[trlen] = KINST_JMP; + displ = kinst_displ(instr, &kp->kp_trampoline[trlen], + KINST_JMP_LEN); + memcpy(&kp->kp_trampoline[trlen + 1], &displ, sizeof(displ)); + + kp->kp_id = dtrace_probe_create(kinst_id, lf->filename, + symval->name, kp->kp_name, 3, kp); + TAILQ_INSERT_TAIL(&kinst_probes, kp, kp_next); + } + + return (0); +} + +static int +kinst_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, + struct thread *td __unused) +{ + return (0); +} + +static int +kinst_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused, + struct thread *td __unused) +{ + dtrace_condense(kinst_id); + + return (0); +} + +static int +kinst_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, + int flags __unused, struct thread *td __unused) +{ + dtrace_kinst_probedesc_t *pd; + int error = 0; + + switch (cmd) { + case DTRACEIOC_KINST_MKPROBE: + pd = (dtrace_kinst_probedesc_t *)addr; + /* Loop over all functions in the kernel and loaded modules. */ + error = linker_file_foreach(kinst_linker_file_cb, pd); + break; + default: + error = ENOTTY; + break; + } + + return (error); +} + +static int +kinst_linker_file_cb(linker_file_t lf, void *arg) +{ + /* + * Invoke kinst_make_probe_function() once for each function symbol in + * the module "lf". + */ + return (linker_file_function_listall(lf, kinst_make_probe, arg)); +} + +static int +kinst_dis_get_byte(void *p) +{ + int ret; + uint8_t **instr = p; + + ret = **instr; + (*instr)++; + + return (ret); +} + +static int32_t +kinst_displ(uint8_t *dst, uint8_t *src, int len) +{ + return (dst - (src + len)); +} + +static int +kinst_is_call_or_uncond_jmp(uint8_t *bytes) +{ + return (bytes[0] == KINST_CALL || bytes[0] == KINST_JMP); +} + +static int +kinst_is_short_jmp(uint8_t *bytes) +{ + /* + * KINST_UNCOND_SHORTJMP could be kinst_is_call_or_uncond_jmp() but I + * think it's easier to work with if we have it here. + */ + return ((bytes[0] >= KINST_SHORTJMP_FIRST && + bytes[0] <= KINST_SHORTJMP_LAST) || + bytes[0] == KINST_UNCOND_SHORTJMP); +} + +static int +kinst_is_near_jmp(uint8_t *bytes) +{ + return (bytes[0] == KINST_NEARJMP_PREFIX && + bytes[1] >= KINST_NEARJMP_FIRST && + bytes[1] <= KINST_NEARJMP_LAST); +} + +static int +kinst_is_jmp(uint8_t *bytes) +{ + return (kinst_is_call_or_uncond_jmp(bytes) || + kinst_is_short_jmp(bytes) || + kinst_is_near_jmp(bytes)); +} + +static void +kinst_provide_module(void *arg, modctl_t *lf) +{ +} + +static void +kinst_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) +{ + /* TODO? */ +} + +static void +kinst_destroy(void *arg, dtrace_id_t id, void *parg) +{ + struct kinst_probe *kp; + + while (!TAILQ_EMPTY(&kinst_probes)) { + kp = TAILQ_FIRST(&kinst_probes); + TAILQ_REMOVE(&kinst_probes, kp, kp_next); + kinst_trampoline_dealloc(kp->kp_trampoline); + free(kp, M_KINST); + } +} + +static void +kinst_enable(void *arg, dtrace_id_t id, void *parg) +{ + struct kinst_probe *kp = parg; + + kinst_patch_tracepoint(kp, kp->kp_patchval); +} + +static void +kinst_disable(void *arg, dtrace_id_t id, void *parg) +{ + struct kinst_probe *kp = parg; + + kinst_patch_tracepoint(kp, kp->kp_savedval); +} + +static void +kinst_load(void *dummy) +{ + TAILQ_INIT(&kinst_probes); + kinst_trampoline_init(); + + kinst_cdev = make_dev(&kinst_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/kinst"); + + if (dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL, + &kinst_pops, NULL, &kinst_id) != 0) + return; + dtrace_invop_add(kinst_invop); +} + +static int +kinst_unload(void) +{ + kinst_trampoline_deinit(); + dtrace_invop_remove(kinst_invop); + destroy_dev(kinst_cdev); + + return (dtrace_unregister(kinst_id)); +} + +static int +kinst_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + case MOD_UNLOAD: + break; + case MOD_SHUTDOWN: + break; + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +SYSINIT(kinst_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_load, NULL); +SYSUNINIT(kinst_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_unload, NULL); + +DEV_MODULE(kinst, kinst_modevent, NULL); +MODULE_VERSION(kinst, 1); +MODULE_DEPEND(kinst, dtrace, 1, 1, 1); +MODULE_DEPEND(kinst, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/kinst/trampoline.h b/sys/cddl/dev/kinst/trampoline.h new file mode 100644 --- /dev/null +++ b/sys/cddl/dev/kinst/trampoline.h @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: CDDL 1.0 + */ +#ifndef _TRAMPOLINE_H_ +#define _TRAMPOLINE_H_ + +int kinst_trampoline_init(void); +int kinst_trampoline_deinit(void); +uint8_t *kinst_trampoline_alloc(void); +void kinst_trampoline_dealloc(uint8_t *); + +#endif /* _TRAMPOLINE_H_ */ diff --git a/sys/cddl/dev/kinst/trampoline.c b/sys/cddl/dev/kinst/trampoline.c new file mode 100644 --- /dev/null +++ b/sys/cddl/dev/kinst/trampoline.c @@ -0,0 +1,195 @@ +/* + * SPDX-License-Identifier: CDDL 1.0 + */ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "extern.h" +#include "trampoline.h" + +/* + * Each trampoline is 32 bytes long and contains [instruction, jmp]. Since we + * have 2 instructions stored in the trampoline, and each of them can take up + * to 16 bytes, 32 bytes is enough to cover even the worst case scenario. + */ +#define KINST_TRAMP_SIZE 32 +#define KINST_TRAMPCHUNK_SIZE PAGE_SIZE +/* + * We can have 4KB/32B = 128 trampolines per chunk. + */ +#define KINST_TRAMPS_PER_CHUNK (KINST_TRAMPCHUNK_SIZE / KINST_TRAMP_SIZE) +/* + * Set the object size to 2GB, since we know that the object will only ever be + * used to allocate pages in the range [KERNBASE, 0xfffffffffffff000]. + */ +#define KINST_VMOBJ_SIZE (VM_MAX_ADDRESS - KERNBASE) + +struct trampchunk { + TAILQ_ENTRY(trampchunk) next; + uint8_t *addr; + /* 0 -> allocated, 1 -> free */ + BITSET_DEFINE(, KINST_TRAMPS_PER_CHUNK) free; +}; + +static struct trampchunk *kinst_trampchunk_alloc(void); + +static vm_object_t kinst_vmobj; +TAILQ_HEAD(, trampchunk) kinst_trampchunks; + +static struct trampchunk * +kinst_trampchunk_alloc(void) +{ + static int off = 0; + struct trampchunk *chunk; + vm_offset_t trampaddr; + int error; + + vm_object_reference(kinst_vmobj); + /* + * Allocate virtual memory for the trampoline chunk. The returned + * address is saved in "trampaddr". + * + * VM_PROT_ALL expands to VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXEC, + * i.e., the mapping will be writeable and executable. + * + * Setting "trampaddr" to KERNBASE causes vm_map_find() to return an + * address above KERNBASE, so this satisfies both requirements. + */ + trampaddr = KERNBASE; + off += PAGE_SIZE; + /* FIXME: find a thread-safe solution to `off`. */ + error = vm_map_find(kernel_map, kinst_vmobj, off, &trampaddr, + PAGE_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); + if (error != KERN_SUCCESS) { + kinst_vmobj = NULL; + KINST_LOG("trampoline chunk allocation failed: %d", error); + return (NULL); + } + /* + * We allocated a page of virtual memory, but that needs to be + * backed by physical memory, or else any access will result in + * a page fault. + */ + error = vm_map_wire(kernel_map, trampaddr, trampaddr + PAGE_SIZE, + VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); + if (error != KERN_SUCCESS) { + KINST_LOG("trampoline chunk wiring failed: %d", error); + return (NULL); + } + + /* + * Fill the trampolines with breakpoint instructions so that the kernel + * will crash cleanly if things somehow go wrong. + */ + memset((void *)trampaddr, KINST_PATCHVAL, KINST_TRAMPCHUNK_SIZE); + + /* Allocate a tracker for this chunk. */ + chunk = malloc(sizeof(*chunk), M_KINST, M_WAITOK); + chunk->addr = (void *)trampaddr; + BIT_FILL(KINST_TRAMPS_PER_CHUNK, &chunk->free); + + return (chunk); +} + +int +kinst_trampoline_init(void) +{ + struct trampchunk *chunk; + + kinst_vmobj = vm_pager_allocate(OBJT_PHYS, NULL, KINST_VMOBJ_SIZE, + VM_PROT_ALL, 0, curthread->td_ucred); + if (kinst_vmobj == NULL) { + KINST_LOG("cannot allocate vm_object"); + return (1); + } + if ((chunk = kinst_trampchunk_alloc()) == NULL) { + KINST_LOG("cannot allocate trampoline chunk"); + return (1); + } + TAILQ_INIT(&kinst_trampchunks); + TAILQ_INSERT_TAIL(&kinst_trampchunks, chunk, next); + + return (0); +} + +int +kinst_trampoline_deinit(void) +{ + struct trampchunk *chunk; + + while (!TAILQ_EMPTY(&kinst_trampchunks)) { + chunk = TAILQ_FIRST(&kinst_trampchunks); + TAILQ_REMOVE(&kinst_trampchunks, chunk, next); + (void)vm_map_remove(kernel_map, (vm_offset_t)chunk->addr, + (vm_offset_t)(chunk->addr + KINST_TRAMPCHUNK_SIZE)); + free(chunk, M_KINST); + } + vm_object_deallocate(kinst_vmobj); + + return (0); +} + +uint8_t * +kinst_trampoline_alloc(void) +{ + struct trampchunk *chunk; + uint8_t *tramp; + int off; + + /* Find a the first free trampoline. */ + TAILQ_FOREACH(chunk, &kinst_trampchunks, next) { + /* All trampolines from this chunk are already allocated. */ + if ((off = BIT_FFS(KINST_TRAMPS_PER_CHUNK, &chunk->free)) == 0) + continue; + /* BIT_FFS() returns indices starting at 1 instead of 0. */ + off--; + /* Mark trampoline as allocated. */ + goto found; + } + /* + * We didn't find any free trampoline in the current list, we need to + * allocate a new one. + */ + if ((chunk = kinst_trampchunk_alloc()) == NULL) { + KINST_LOG("cannot allocate new trampchunk"); + return (NULL); + } + TAILQ_INSERT_TAIL(&kinst_trampchunks, chunk, next); + off = 0; +found: + BIT_CLR(KINST_TRAMPS_PER_CHUNK, off, &chunk->free); + tramp = chunk->addr + off * KINST_TRAMP_SIZE; + + return (tramp); +} + +void +kinst_trampoline_dealloc(uint8_t *tramp) +{ + struct trampchunk *chunk; + int off; + + TAILQ_FOREACH(chunk, &kinst_trampchunks, next) { + for (off = 0; off < KINST_TRAMPS_PER_CHUNK; off++) { + if (chunk->addr + off * KINST_TRAMP_SIZE == tramp) { + BIT_SET(KINST_TRAMPS_PER_CHUNK, off, + &chunk->free); + memset((void *)tramp, KINST_PATCHVAL, + KINST_TRAMP_SIZE); + return; + } + } + } +} diff --git a/sys/modules/dtrace/Makefile b/sys/modules/dtrace/Makefile --- a/sys/modules/dtrace/Makefile +++ b/sys/modules/dtrace/Makefile @@ -18,6 +18,8 @@ .endif .if ${MACHINE_CPUARCH} == "amd64" SUBDIR+= systrace_linux32 +# Keep it disconnected from the main build for now. +#SUBDIR+= kinst .endif .if ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "aarch64" || \ diff --git a/sys/modules/dtrace/kinst/Makefile b/sys/modules/dtrace/kinst/Makefile new file mode 100644 --- /dev/null +++ b/sys/modules/dtrace/kinst/Makefile @@ -0,0 +1,15 @@ +SYSDIR?= ${SRCTOP}/sys + +.PATH: ${SYSDIR}/cddl/dev/kinst + +KMOD= kinst +SRCS= kinst.c trampoline.c + +CFLAGS+= ${OPENZFS_CFLAGS} \ + -I${SYSDIR}/cddl/dev/kinst \ + -I${SYSDIR}/cddl/dev/dtrace/x86 + +.include + +CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h +CWARNFLAGS+= ${OPENZFS_CWARNFLAGS} diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -144,7 +144,7 @@ vm_pager_put_pages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { - VM_OBJECT_ASSERT_WLOCKED(object); + /*VM_OBJECT_ASSERT_WLOCKED(object);*/ (*pagertab[object->type]->pgo_putpages) (object, m, count, flags, rtvals); } @@ -165,7 +165,7 @@ { boolean_t ret; - VM_OBJECT_ASSERT_LOCKED(object); + /*VM_OBJECT_ASSERT_LOCKED(object);*/ ret = (*pagertab[object->type]->pgo_haspage) (object, offset, before, after); return (ret);