diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/amd64/kinst/tst.basic.ksh b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/amd64/kinst/tst.basic.ksh index e3585ca70793..3005da74c895 100644 --- a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/amd64/kinst/tst.basic.ksh +++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/amd64/kinst/tst.basic.ksh @@ -1,47 +1,48 @@ #!/usr/bin/ksh # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version # 1.0 of the CDDL. # # A full copy of the text of the CDDL should have accompanied this # source. A copy of the CDDL is also available via the Internet at # http://www.illumos.org/license/CDDL. # # # Copyright (c) 2022 Mark Johnston # script() { $dtrace -q -s /dev/stdin <<__EOF__ kinst::vm_fault: {} kinst::amd64_syscall: {} kinst::exit1: {} kinst::spinlock_enter: {} +kinst::memcpy: {} tick-10s {exit(0);} __EOF__ } spin() { while true; do ls -la / >/dev/null 2>&1 done } if [ $# != 1 ]; then echo expected one argument: '<'dtrace-path'>' exit 2 fi dtrace=$1 spin & child=$! script exit $? diff --git a/sys/cddl/dev/kinst/amd64/kinst_isa.c b/sys/cddl/dev/kinst/amd64/kinst_isa.c index 7aba79c1d481..d29f1cd4181f 100644 --- a/sys/cddl/dev/kinst/amd64/kinst_isa.c +++ b/sys/cddl/dev/kinst/amd64/kinst_isa.c @@ -1,618 +1,618 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright 2022 Christos Margiolis * Copyright 2022 Mark Johnston */ #include #include #include #include #include #include #include #include "kinst.h" #define KINST_PUSHL_RBP 0x55 #define KINST_STI 0xfb #define KINST_POPF 0x9d #define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6) #define KINST_MODRM_REG(b) (((b) & 0x38) >> 3) #define KINST_MODRM_RM(b) ((b) & 0x07) #define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6) #define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3) #define KINST_SIB_BASE(s) (((s) & 0x07) >> 0) #define KINST_REX_W(r) (((r) & 0x08) >> 3) #define KINST_REX_R(r) (((r) & 0x04) >> 2) #define KINST_REX_X(r) (((r) & 0x02) >> 1) #define KINST_REX_B(r) (((r) & 0x01) >> 0) #define KINST_F_CALL 0x0001 /* instruction is a "call" */ #define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */ #define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */ #define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */ #define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */ /* * Per-CPU trampolines used when the interrupted thread is executing with * interrupts disabled. If an interrupt is raised while executing a trampoline, * the interrupt thread cannot safely overwrite its trampoline if it hits a * kinst probe while executing the interrupt handler. */ DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp); /* * Map ModR/M register bits to a trapframe offset. */ static int kinst_regoff(int reg) { #define _MATCH_REG(i, reg) \ case i: \ return (offsetof(struct trapframe, tf_ ## reg) / \ sizeof(register_t)) switch (reg) { _MATCH_REG( 0, rax); _MATCH_REG( 1, rcx); _MATCH_REG( 2, rdx); _MATCH_REG( 3, rbx); _MATCH_REG( 4, rsp); /* SIB when mod != 3 */ _MATCH_REG( 5, rbp); _MATCH_REG( 6, rsi); _MATCH_REG( 7, rdi); _MATCH_REG( 8, r8); /* REX.R is set */ _MATCH_REG( 9, r9); _MATCH_REG(10, r10); _MATCH_REG(11, r11); _MATCH_REG(12, r12); _MATCH_REG(13, r13); _MATCH_REG(14, r14); _MATCH_REG(15, r15); } #undef _MATCH_REG panic("%s: unhandled register index %d", __func__, reg); } /* * Obtain the specified register's value. */ static uint64_t kinst_regval(struct trapframe *frame, int reg) { if (reg == -1) return (0); return (((register_t *)frame)[kinst_regoff(reg)]); } static uint32_t kinst_riprel_disp(struct kinst_probe *kp, void *dst) { return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp - (intptr_t)dst)); } static void kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp) { uint8_t *instr; uint32_t disp; int ilen; ilen = kp->kp_md.tinstlen; - memcpy(tramp, kp->kp_md.template, ilen); + kinst_memcpy(tramp, kp->kp_md.template, ilen); if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) { disp = kinst_riprel_disp(kp, tramp); - memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t)); + kinst_memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t)); } /* * The following position-independent jmp takes us back to the * original code. It is encoded as "jmp *0(%rip)" (six bytes), * followed by the absolute address of the instruction following * the one that was traced (eight bytes). */ tramp[ilen + 0] = 0xff; tramp[ilen + 1] = 0x25; tramp[ilen + 2] = 0x00; tramp[ilen + 3] = 0x00; tramp[ilen + 4] = 0x00; tramp[ilen + 5] = 0x00; instr = kp->kp_patchpoint + kp->kp_md.instlen; - memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t)); + kinst_memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t)); } int kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch) { solaris_cpu_t *cpu; uintptr_t *stack, retaddr; struct kinst_probe *kp; struct kinst_probe_md *kpmd; uint8_t *tramp; stack = (uintptr_t *)frame->tf_rsp; cpu = &solaris_cpu[curcpu]; LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) { if ((uintptr_t)kp->kp_patchpoint == addr) break; } if (kp == NULL) return (0); /* * Report the address of the breakpoint for the benefit of consumers * fetching register values with regs[]. */ frame->tf_rip--; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); cpu->cpu_dtrace_caller = stack[0]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0); cpu->cpu_dtrace_caller = 0; kpmd = &kp->kp_md; if ((kpmd->flags & KINST_F_CALL) != 0) { /* * dtrace_invop_start() reserves space on the stack to * store the return address of the call instruction. */ retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen); *(uintptr_t *)scratch = retaddr; if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) { frame->tf_rip = (uintptr_t)(kp->kp_patchpoint + kpmd->disp + kpmd->instlen); } else { register_t rval; if (kpmd->reg1 == -1 && kpmd->reg2 == -1) { /* rip-relative */ rval = frame->tf_rip + kpmd->instlen; } else { /* indirect */ rval = kinst_regval(frame, kpmd->reg1) + (kinst_regval(frame, kpmd->reg2) << kpmd->scale); } if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) { frame->tf_rip = rval + kpmd->disp; } else { frame->tf_rip = *(uintptr_t *)(rval + kpmd->disp); } } return (DTRACE_INVOP_CALL); } else { if ((frame->tf_rflags & PSL_I) == 0) tramp = DPCPU_GET(intr_tramp); else tramp = curthread->t_kinst; if (tramp == NULL) { /* * A trampoline allocation failed, so this probe is * effectively disabled. Restore the original * instruction. * * We can't safely print anything here, but the * trampoline allocator should have left a breadcrumb in * the dmesg. */ kinst_patch_tracepoint(kp, kp->kp_savedval); frame->tf_rip = (register_t)kp->kp_patchpoint; } else { kinst_trampoline_populate(kp, tramp); frame->tf_rip = (register_t)tramp; } return (DTRACE_INVOP_NOP); } } void kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val) { register_t reg; int oldwp; reg = intr_disable(); oldwp = disable_wp(); *kp->kp_patchpoint = val; restore_wp(oldwp); intr_restore(reg); } static void kinst_set_disp8(struct kinst_probe *kp, uint8_t byte) { kp->kp_md.disp = (int64_t)(int8_t)byte; } static void kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes) { int32_t disp32; memcpy(&disp32, bytes, sizeof(disp32)); kp->kp_md.disp = (int64_t)disp32; } /* * Set up all of the state needed to faithfully execute a probed instruction. * * In the simple case, we copy the instruction unmodified to a per-thread * trampoline, wherein it is followed by a jump back to the original code. * - Instructions can have %rip as an operand: * - with %rip-relative addressing encoded in ModR/M, or * - implicitly as a part of the instruction definition (jmp, call). * - Call instructions (which may be %rip-relative) need to push the correct * return address onto the stack. * * Call instructions are simple enough to be emulated in software, so we simply * do not use the trampoline mechanism in that case. kinst_invop() will compute * the branch target using the address info computed here (register operands and * displacement). * * %rip-relative operands encoded using the ModR/M byte always use a 32-bit * displacement; when populating the trampoline the displacement is adjusted to * be relative to the trampoline address. Trampolines are always allocated * above KERNBASE for this reason. * * For other %rip-relative operands (just jumps) we take the same approach. * Instructions which specify an 8-bit displacement must be rewritten to use a * 32-bit displacement. */ static int kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr) { struct kinst_probe_md *kpmd; dis86_t d86; uint8_t *bytes, modrm, rex; int dispoff, i, ilen, opcidx; kpmd = &kp->kp_md; d86.d86_data = instr; d86.d86_get_byte = dtrace_dis_get_byte; d86.d86_check_func = NULL; if (dtrace_disx86(&d86, SIZE64) != 0) { KINST_LOG("failed to disassemble instruction at: %p", *instr); return (EINVAL); } bytes = d86.d86_bytes; kpmd->instlen = kpmd->tinstlen = d86.d86_len; /* * Skip over prefixes, save REX. */ rex = 0; for (i = 0; i < kpmd->instlen; i++) { switch (bytes[i]) { case 0xf0 ... 0xf3: /* group 1 */ continue; case 0x26: case 0x2e: case 0x36: case 0x3e: case 0x64: case 0x65: /* group 2 */ continue; case 0x66: /* group 3 */ continue; case 0x67: /* group 4 */ continue; case 0x40 ... 0x4f: /* REX */ rex = bytes[i]; continue; } break; } KASSERT(i < kpmd->instlen, ("%s: failed to disassemble instruction at %p", __func__, bytes)); opcidx = i; /* * Identify instructions of interest by opcode: calls and jumps. * Extract displacements. */ dispoff = -1; switch (bytes[opcidx]) { case 0x0f: switch (bytes[opcidx + 1]) { case 0x80 ... 0x8f: /* conditional jmp near */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 2; kinst_set_disp32(kp, &bytes[dispoff]); break; } break; case 0xe3: /* * There is no straightforward way to translate this instruction * to use a 32-bit displacement. Fortunately, it is rarely * used. */ return (EINVAL); case 0x70 ... 0x7f: /* conditional jmp short */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 1; kinst_set_disp8(kp, bytes[dispoff]); break; case 0xe9: /* unconditional jmp near */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 1; kinst_set_disp32(kp, &bytes[dispoff]); break; case 0xeb: /* unconditional jmp short */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 1; kinst_set_disp8(kp, bytes[dispoff]); break; case 0xe8: case 0x9a: /* direct call */ kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL; dispoff = opcidx + 1; kinst_set_disp32(kp, &bytes[dispoff]); break; case 0xff: KASSERT(d86.d86_got_modrm, ("no ModR/M byte for instr at %p", *instr - kpmd->instlen)); switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) { case 0x02: case 0x03: /* indirect call */ kpmd->flags |= KINST_F_CALL; break; case 0x04: case 0x05: /* indirect jump */ kpmd->flags |= KINST_F_JMP; break; } } /* * If there's a ModR/M byte, we need to check it to see if the operand * is %rip-relative, and rewrite the displacement if so. If not, we * might still have to extract operand info if this is a call * instruction. */ if (d86.d86_got_modrm) { uint8_t mod, rm, sib; kpmd->reg1 = kpmd->reg2 = -1; modrm = bytes[d86.d86_rmindex]; mod = KINST_MODRM_MOD(modrm); rm = KINST_MODRM_RM(modrm); if (mod == 0 && rm == 5) { kpmd->flags |= KINST_F_RIPREL; dispoff = d86.d86_rmindex + 1; kinst_set_disp32(kp, &bytes[dispoff]); } else if ((kpmd->flags & KINST_F_CALL) != 0) { bool havesib; havesib = (mod != 3 && rm == 4); dispoff = d86.d86_rmindex + (havesib ? 2 : 1); if (mod == 1) kinst_set_disp8(kp, bytes[dispoff]); else if (mod == 2) kinst_set_disp32(kp, &bytes[dispoff]); else if (mod == 3) kpmd->flags |= KINST_F_MOD_DIRECT; if (havesib) { sib = bytes[d86.d86_rmindex + 1]; if (KINST_SIB_BASE(sib) != 5) { kpmd->reg1 = KINST_SIB_BASE(sib) | (KINST_REX_B(rex) << 3); } kpmd->scale = KINST_SIB_SCALE(sib); kpmd->reg2 = KINST_SIB_INDEX(sib) | (KINST_REX_X(rex) << 3); } else { kpmd->reg1 = rm | (KINST_REX_B(rex) << 3); } } } /* * Calls are emulated in software; once operands are decoded we have * nothing else to do. */ if ((kpmd->flags & KINST_F_CALL) != 0) return (0); /* * Allocate and populate an instruction trampoline template. * * Position-independent instructions can simply be copied, but * position-dependent instructions require some surgery: jump * instructions with an 8-bit displacement need to be converted to use a * 32-bit displacement, and the adjusted displacement needs to be * computed. */ ilen = kpmd->instlen; if ((kpmd->flags & KINST_F_RIPREL) != 0) { if ((kpmd->flags & KINST_F_JMP) == 0 || bytes[opcidx] == 0x0f || bytes[opcidx] == 0xe9 || bytes[opcidx] == 0xff) { memcpy(kpmd->template, bytes, dispoff); memcpy(&kpmd->template[dispoff + 4], &bytes[dispoff + 4], ilen - (dispoff + 4)); kpmd->dispoff = dispoff; } else if (bytes[opcidx] == 0xeb) { memcpy(kpmd->template, bytes, opcidx); kpmd->template[opcidx] = 0xe9; kpmd->dispoff = opcidx + 1; /* Instruction length changes from 2 to 5. */ kpmd->tinstlen = 5; kpmd->disp -= 3; } else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) { memcpy(kpmd->template, bytes, opcidx); kpmd->template[opcidx] = 0x0f; kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10; kpmd->dispoff = opcidx + 2; /* Instruction length changes from 2 to 6. */ kpmd->tinstlen = 6; kpmd->disp -= 4; } else { panic("unhandled opcode %#x", bytes[opcidx]); } } else { memcpy(kpmd->template, bytes, ilen); } return (0); } int kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, void *opaque) { struct kinst_probe *kp; dtrace_kinst_probedesc_t *pd; const char *func; int error, instrsize, n, off; uint8_t *instr, *limit; pd = opaque; func = symval->name; if (kinst_excluded(func)) return (0); if (strcmp(func, pd->kpd_func) != 0) return (0); instr = (uint8_t *)symval->value; limit = (uint8_t *)symval->value + symval->size; if (instr >= limit) return (0); /* * Ignore functions not beginning with the usual function prologue. * These might correspond to exception handlers with which we should not * meddle. This does however exclude functions which can be safely * traced, such as cpu_switch(). */ if (*instr != KINST_PUSHL_RBP) return (0); n = 0; while (instr < limit) { instrsize = dtrace_instr_size(instr); off = (int)(instr - (uint8_t *)symval->value); if (pd->kpd_off != -1 && off != pd->kpd_off) { instr += instrsize; continue; } /* * Check for instructions which may enable interrupts. Such * instructions are tricky to trace since it is unclear whether * to use the per-thread or per-CPU trampolines. Since they are * rare, we don't bother to implement special handling for them. * * If the caller specified an offset, return an error, otherwise * silently ignore the instruction so that it remains possible * to enable all instructions in a function. */ if (instrsize == 1 && (instr[0] == KINST_POPF || instr[0] == KINST_STI)) { if (pd->kpd_off != -1) return (EINVAL); instr += instrsize; continue; } /* * Prevent separate dtrace(1) instances from creating copies of * the same probe. */ LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) { if (strcmp(kp->kp_func, func) == 0 && strtol(kp->kp_name, NULL, 10) == off) return (0); } if (++n > KINST_PROBETAB_MAX) { KINST_LOG("probe list full: %d entries", n); return (ENOMEM); } kp = malloc(sizeof(struct kinst_probe), M_KINST, M_WAITOK | M_ZERO); kp->kp_func = func; snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off); kp->kp_savedval = *instr; kp->kp_patchval = KINST_PATCHVAL; kp->kp_patchpoint = instr; error = kinst_instr_dissect(kp, &instr); if (error != 0) return (error); kinst_probe_create(kp, lf); } return (0); } int kinst_md_init(void) { uint8_t *tramp; int cpu; CPU_FOREACH(cpu) { tramp = kinst_trampoline_alloc(M_WAITOK); if (tramp == NULL) return (ENOMEM); DPCPU_ID_SET(cpu, intr_tramp, tramp); } return (0); } void kinst_md_deinit(void) { uint8_t *tramp; int cpu; CPU_FOREACH(cpu) { tramp = DPCPU_ID_GET(cpu, intr_tramp); if (tramp != NULL) { kinst_trampoline_dealloc(tramp); DPCPU_ID_SET(cpu, intr_tramp, NULL); } } } /* * Exclude machine-dependent functions that are not safe-to-trace. */ int kinst_md_excluded(const char *name) { return (0); } diff --git a/sys/cddl/dev/kinst/kinst.c b/sys/cddl/dev/kinst/kinst.c index 46b9bf2f41e8..e30b813e0400 100644 --- a/sys/cddl/dev/kinst/kinst.c +++ b/sys/cddl/dev/kinst/kinst.c @@ -1,295 +1,316 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright 2022 Christos Margiolis */ #include #include #include #include #include #include #include #include "kinst.h" MALLOC_DEFINE(M_KINST, "kinst", "Kernel Instruction Tracing"); static d_open_t kinst_open; static d_close_t kinst_close; static d_ioctl_t kinst_ioctl; static void kinst_provide_module(void *, modctl_t *); static void kinst_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); static void kinst_destroy(void *, dtrace_id_t, void *); static void kinst_enable(void *, dtrace_id_t, void *); static void kinst_disable(void *, dtrace_id_t, void *); static int kinst_load(void *); static int kinst_unload(void *); static int kinst_modevent(module_t, int, void *); static dtrace_pattr_t kinst_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, }; static const dtrace_pops_t kinst_pops = { .dtps_provide = NULL, .dtps_provide_module = kinst_provide_module, .dtps_enable = kinst_enable, .dtps_disable = kinst_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = kinst_getargdesc, .dtps_getargval = NULL, .dtps_usermode = NULL, .dtps_destroy = kinst_destroy }; static struct cdevsw kinst_cdevsw = { .d_name = "kinst", .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = kinst_open, .d_close = kinst_close, .d_ioctl = kinst_ioctl, }; static dtrace_provider_id_t kinst_id; struct kinst_probe_list *kinst_probetab; static struct cdev *kinst_cdev; +/* + * Tracing memcpy() will crash the kernel when kinst tries to trace an instance + * of the memcpy() calls in kinst_invop(). To fix this, we can use + * kinst_memcpy() in those cases, with its arguments marked as 'volatile' to + * "outsmart" the compiler and avoid having it replaced by a regular memcpy(). + */ +volatile void * +kinst_memcpy(volatile void *dst, volatile const void *src, size_t len) +{ + volatile const unsigned char *src0; + volatile unsigned char *dst0; + + src0 = src; + dst0 = dst; + + while (len--) + *dst0++ = *src0++; + + return (dst); +} + int kinst_excluded(const char *name) { if (kinst_md_excluded(name)) return (1); /* * Anything beginning with "dtrace_" may be called from probe context * unless it explicitly indicates that it won't be called from probe * context by using the prefix "dtrace_safe_". */ if (strncmp(name, "dtrace_", strlen("dtrace_")) == 0 && strncmp(name, "dtrace_safe_", strlen("dtrace_safe_")) != 0) return (1); /* * Omit instrumentation of functions that are probably in DDB. It * makes it too hard to debug broken kinst. * * NB: kdb_enter() can be excluded, but its call to printf() can't be. * This is generally OK since we're not yet in debugging context. */ if (strncmp(name, "db_", strlen("db_")) == 0 || strncmp(name, "kdb_", strlen("kdb_")) == 0) return (1); /* * Lock owner methods may be called from probe context. */ if (strcmp(name, "owner_mtx") == 0 || strcmp(name, "owner_rm") == 0 || strcmp(name, "owner_rw") == 0 || strcmp(name, "owner_sx") == 0) return (1); /* * When DTrace is built into the kernel we need to exclude the kinst * functions from instrumentation. */ #ifndef _KLD_MODULE if (strncmp(name, "kinst_", strlen("kinst_")) == 0) return (1); #endif if (strcmp(name, "trap_check") == 0) return (1); return (0); } void kinst_probe_create(struct kinst_probe *kp, linker_file_t lf) { kp->kp_id = dtrace_probe_create(kinst_id, lf->filename, kp->kp_func, kp->kp_name, 3, kp); LIST_INSERT_HEAD(KINST_GETPROBE(kp->kp_patchpoint), kp, kp_hashnext); } static int kinst_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) { return (0); } static int kinst_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused, struct thread *td __unused) { dtrace_condense(kinst_id); return (0); } static int kinst_linker_file_cb(linker_file_t lf, void *arg) { dtrace_kinst_probedesc_t *pd; pd = arg; if (pd->kpd_mod[0] != '\0' && strcmp(pd->kpd_mod, lf->filename) != 0) return (0); /* * Invoke kinst_make_probe_function() once for each function symbol in * the module "lf". */ return (linker_file_function_listall(lf, kinst_make_probe, arg)); } static int kinst_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td __unused) { dtrace_kinst_probedesc_t *pd; int error = 0; switch (cmd) { case KINSTIOC_MAKEPROBE: pd = (dtrace_kinst_probedesc_t *)addr; pd->kpd_func[sizeof(pd->kpd_func) - 1] = '\0'; pd->kpd_mod[sizeof(pd->kpd_mod) - 1] = '\0'; /* Loop over all functions in the kernel and loaded modules. */ error = linker_file_foreach(kinst_linker_file_cb, pd); break; default: error = ENOTTY; break; } return (error); } static void kinst_provide_module(void *arg, modctl_t *lf) { } static void kinst_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { desc->dtargd_ndx = DTRACE_ARGNONE; } static void kinst_destroy(void *arg, dtrace_id_t id, void *parg) { struct kinst_probe *kp = parg; LIST_REMOVE(kp, kp_hashnext); free(kp, M_KINST); } static void kinst_enable(void *arg, dtrace_id_t id, void *parg) { struct kinst_probe *kp = parg; static bool warned = false; if (!warned) { KINST_LOG( "kinst: This provider is experimental, exercise caution"); warned = true; } kinst_patch_tracepoint(kp, kp->kp_patchval); } static void kinst_disable(void *arg, dtrace_id_t id, void *parg) { struct kinst_probe *kp = parg; kinst_patch_tracepoint(kp, kp->kp_savedval); } static int kinst_load(void *dummy) { int error; error = kinst_trampoline_init(); if (error != 0) return (error); error = kinst_md_init(); if (error != 0) { kinst_trampoline_deinit(); return (error); } error = dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL, &kinst_pops, NULL, &kinst_id); if (error != 0) { kinst_md_deinit(); kinst_trampoline_deinit(); return (error); } kinst_probetab = malloc(KINST_PROBETAB_MAX * sizeof(struct kinst_probe_list), M_KINST, M_WAITOK | M_ZERO); for (int i = 0; i < KINST_PROBETAB_MAX; i++) LIST_INIT(&kinst_probetab[i]); kinst_cdev = make_dev(&kinst_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/kinst"); dtrace_invop_add(kinst_invop); return (0); } static int kinst_unload(void *dummy) { free(kinst_probetab, M_KINST); kinst_md_deinit(); kinst_trampoline_deinit(); dtrace_invop_remove(kinst_invop); destroy_dev(kinst_cdev); return (dtrace_unregister(kinst_id)); } static int kinst_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(kinst_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_load, NULL); SYSUNINIT(kinst_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_unload, NULL); DEV_MODULE(kinst, kinst_modevent, NULL); MODULE_VERSION(kinst, 1); MODULE_DEPEND(kinst, dtrace, 1, 1, 1); MODULE_DEPEND(kinst, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/kinst/kinst.h b/sys/cddl/dev/kinst/kinst.h index ee756dc87d09..1107a274333f 100644 --- a/sys/cddl/dev/kinst/kinst.h +++ b/sys/cddl/dev/kinst/kinst.h @@ -1,76 +1,77 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright 2022 Christos Margiolis */ #ifndef _KINST_H_ #define _KINST_H_ #include typedef struct { char kpd_func[DTRACE_FUNCNAMELEN]; char kpd_mod[DTRACE_MODNAMELEN]; int kpd_off; } dtrace_kinst_probedesc_t; #define KINSTIOC_MAKEPROBE _IOW('k', 1, dtrace_kinst_probedesc_t) #ifdef _KERNEL #include #include "kinst_isa.h" struct kinst_probe { LIST_ENTRY(kinst_probe) kp_hashnext; const char *kp_func; char kp_name[16]; dtrace_id_t kp_id; kinst_patchval_t kp_patchval; kinst_patchval_t kp_savedval; kinst_patchval_t *kp_patchpoint; struct kinst_probe_md kp_md; }; LIST_HEAD(kinst_probe_list, kinst_probe); extern struct kinst_probe_list *kinst_probetab; #define KINST_PROBETAB_MAX 0x8000 /* 32k */ #define KINST_ADDR2NDX(addr) (((uintptr_t)(addr)) & (KINST_PROBETAB_MAX - 1)) #define KINST_GETPROBE(i) (&kinst_probetab[KINST_ADDR2NDX(i)]) struct linker_file; struct linker_symval; +volatile void *kinst_memcpy(volatile void *, volatile const void *, size_t); int kinst_excluded(const char *); int kinst_md_excluded(const char *); int kinst_invop(uintptr_t, struct trapframe *, uintptr_t); int kinst_make_probe(struct linker_file *, int, struct linker_symval *, void *); void kinst_patch_tracepoint(struct kinst_probe *, kinst_patchval_t); void kinst_probe_create(struct kinst_probe *, struct linker_file *); int kinst_trampoline_init(void); int kinst_trampoline_deinit(void); uint8_t *kinst_trampoline_alloc(int); void kinst_trampoline_dealloc(uint8_t *); int kinst_md_init(void); void kinst_md_deinit(void); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_KINST); #endif /* MALLOC_DECLARE */ #define KINST_LOG_HELPER(fmt, ...) \ printf("%s:%d: " fmt "%s\n", __func__, __LINE__, __VA_ARGS__) #define KINST_LOG(...) \ KINST_LOG_HELPER(__VA_ARGS__, "") #endif /* _KERNEL */ #endif /* _KINST_H_ */