diff --git a/sys/cddl/dev/dtrace/dtrace_cddl.h b/sys/cddl/dev/dtrace/dtrace_cddl.h index b2397d621355..720c231bfca8 100644 --- a/sys/cddl/dev/dtrace/dtrace_cddl.h +++ b/sys/cddl/dev/dtrace/dtrace_cddl.h @@ -1,178 +1,178 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * $FreeBSD$ * */ #ifndef _DTRACE_CDDL_H_ #define _DTRACE_CDDL_H_ #include #define LOCK_LEVEL 10 /* * Kernel DTrace extension to 'struct proc' for FreeBSD. */ typedef struct kdtrace_proc { int p_dtrace_probes; /* Are there probes for this proc? */ uint64_t p_dtrace_count; /* Number of DTrace tracepoints */ void *p_dtrace_helpers; /* DTrace helpers, if any */ int p_dtrace_model; uint64_t p_fasttrap_tp_gen; /* Tracepoint hash table gen */ } kdtrace_proc_t; /* * Kernel DTrace extension to 'struct thread' for FreeBSD. */ typedef struct kdtrace_thread { uint8_t td_dtrace_stop; /* Indicates a DTrace-desired stop */ uint8_t td_dtrace_sig; /* Signal sent via DTrace's raise() */ uint8_t td_dtrace_inprobe; /* Are we in a probe? */ u_int td_predcache; /* DTrace predicate cache */ uint64_t td_dtrace_vtime; /* DTrace virtual time */ uint64_t td_dtrace_start; /* DTrace slice start time */ union __tdu { struct __tds { uint8_t _td_dtrace_on; /* Hit a fasttrap tracepoint. */ uint8_t _td_dtrace_step; /* About to return to kernel. */ uint8_t _td_dtrace_ret; /* Handling a return probe. */ uint8_t _td_dtrace_ast; /* Saved ast flag. */ #ifdef __amd64__ uint8_t _td_dtrace_reg; #endif } _tds; u_long _td_dtrace_ft; /* Bitwise or of these flags. */ } _tdu; #define td_dtrace_ft _tdu._td_dtrace_ft #define td_dtrace_on _tdu._tds._td_dtrace_on #define td_dtrace_step _tdu._tds._td_dtrace_step #define td_dtrace_ret _tdu._tds._td_dtrace_ret #define td_dtrace_ast _tdu._tds._td_dtrace_ast #define td_dtrace_reg _tdu._tds._td_dtrace_reg uintptr_t td_dtrace_pc; /* DTrace saved pc from fasttrap. */ uintptr_t td_dtrace_npc; /* DTrace next pc from fasttrap. */ uintptr_t td_dtrace_scrpc; /* DTrace per-thread scratch location. */ uintptr_t td_dtrace_astpc; /* DTrace return sequence location. */ #ifdef __amd64__ uintptr_t td_dtrace_regv; #endif uint64_t td_hrtime; /* Last time on cpu. */ void *td_dtrace_sscr; /* Saved scratch space location. */ void *td_systrace_args; /* syscall probe arguments. */ uint64_t td_fasttrap_tp_gen; /* Tracepoint hash table gen. */ struct trapframe *td_dtrace_trapframe; /* Trap frame from invop. */ - void *td_kinst; + void *td_kinst_tramp; } kdtrace_thread_t; /* * Definitions to reference fields in the FreeBSD DTrace structures defined * above using the names of fields in similar structures in Solaris. Note * that the separation on FreeBSD is a licensing constraint designed to * keep the GENERIC kernel BSD licensed. */ #define t_dtrace_vtime td_dtrace->td_dtrace_vtime #define t_dtrace_start td_dtrace->td_dtrace_start #define t_dtrace_stop td_dtrace->td_dtrace_stop #define t_dtrace_sig td_dtrace->td_dtrace_sig #define t_dtrace_inprobe td_dtrace->td_dtrace_inprobe #define t_predcache td_dtrace->td_predcache #define t_dtrace_ft td_dtrace->td_dtrace_ft #define t_dtrace_on td_dtrace->td_dtrace_on #define t_dtrace_step td_dtrace->td_dtrace_step #define t_dtrace_ret td_dtrace->td_dtrace_ret #define t_dtrace_ast td_dtrace->td_dtrace_ast #define t_dtrace_reg td_dtrace->td_dtrace_reg #define t_dtrace_pc td_dtrace->td_dtrace_pc #define t_dtrace_npc td_dtrace->td_dtrace_npc #define t_dtrace_scrpc td_dtrace->td_dtrace_scrpc #define t_dtrace_astpc td_dtrace->td_dtrace_astpc #define t_dtrace_regv td_dtrace->td_dtrace_regv #define t_dtrace_sscr td_dtrace->td_dtrace_sscr #define t_dtrace_systrace_args td_dtrace->td_systrace_args #define t_fasttrap_tp_gen td_dtrace->td_fasttrap_tp_gen #define t_dtrace_trapframe td_dtrace->td_dtrace_trapframe -#define t_kinst td_dtrace->td_kinst +#define t_kinst_tramp td_dtrace->td_kinst_tramp #define p_dtrace_helpers p_dtrace->p_dtrace_helpers #define p_dtrace_count p_dtrace->p_dtrace_count #define p_dtrace_probes p_dtrace->p_dtrace_probes #define p_model p_dtrace->p_dtrace_model #define p_fasttrap_tp_gen p_dtrace->p_fasttrap_tp_gen #define DATAMODEL_NATIVE 0 #ifdef __amd64__ #define DATAMODEL_LP64 0 #define DATAMODEL_ILP32 1 #else #define DATAMODEL_LP64 1 #define DATAMODEL_ILP32 0 #endif /* * Definitions for fields in struct proc which are named differently in FreeBSD. */ #define p_cred p_ucred #define p_parent p_pptr /* * Definitions for fields in struct thread which are named differently in FreeBSD. */ #define t_procp td_proc #define t_tid td_tid #define t_did td_tid #define t_cred td_ucred int priv_policy(const cred_t *, int, boolean_t, int, const char *); boolean_t priv_policy_only(const cred_t *, int, boolean_t); boolean_t priv_policy_choice(const cred_t *, int, boolean_t); /* * Test privilege. Audit success or failure, allow privilege debugging. * Returns 0 for success, err for failure. */ #define PRIV_POLICY(cred, priv, all, err, reason) \ priv_policy((cred), (priv), (all), (err), (reason)) /* * Test privilege. Audit success only, no privilege debugging. * Returns 1 for success, and 0 for failure. */ #define PRIV_POLICY_CHOICE(cred, priv, all) \ priv_policy_choice((cred), (priv), (all)) /* * Test privilege. No priv_debugging, no auditing. * Returns 1 for success, and 0 for failure. */ #define PRIV_POLICY_ONLY(cred, priv, all) \ priv_policy_only((cred), (priv), (all)) #endif /* !_DTRACE_CDDL_H_ */ diff --git a/sys/cddl/dev/kinst/amd64/kinst_isa.c b/sys/cddl/dev/kinst/amd64/kinst_isa.c index f8bfad8fae60..76b590f271bf 100644 --- a/sys/cddl/dev/kinst/amd64/kinst_isa.c +++ b/sys/cddl/dev/kinst/amd64/kinst_isa.c @@ -1,618 +1,618 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright 2022 Christos Margiolis * Copyright 2022 Mark Johnston */ #include #include #include #include #include #include #include #include "kinst.h" #define KINST_PUSHL_RBP 0x55 #define KINST_STI 0xfb #define KINST_POPF 0x9d #define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6) #define KINST_MODRM_REG(b) (((b) & 0x38) >> 3) #define KINST_MODRM_RM(b) ((b) & 0x07) #define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6) #define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3) #define KINST_SIB_BASE(s) (((s) & 0x07) >> 0) #define KINST_REX_W(r) (((r) & 0x08) >> 3) #define KINST_REX_R(r) (((r) & 0x04) >> 2) #define KINST_REX_X(r) (((r) & 0x02) >> 1) #define KINST_REX_B(r) (((r) & 0x01) >> 0) #define KINST_F_CALL 0x0001 /* instruction is a "call" */ #define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */ #define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */ #define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */ #define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */ /* * Per-CPU trampolines used when the interrupted thread is executing with * interrupts disabled. If an interrupt is raised while executing a trampoline, * the interrupt thread cannot safely overwrite its trampoline if it hits a * kinst probe while executing the interrupt handler. */ DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp); /* * Map ModR/M register bits to a trapframe offset. */ static int kinst_regoff(int reg) { #define _MATCH_REG(i, reg) \ case i: \ return (offsetof(struct trapframe, tf_ ## reg) / \ sizeof(register_t)) switch (reg) { _MATCH_REG( 0, rax); _MATCH_REG( 1, rcx); _MATCH_REG( 2, rdx); _MATCH_REG( 3, rbx); _MATCH_REG( 4, rsp); /* SIB when mod != 3 */ _MATCH_REG( 5, rbp); _MATCH_REG( 6, rsi); _MATCH_REG( 7, rdi); _MATCH_REG( 8, r8); /* REX.R is set */ _MATCH_REG( 9, r9); _MATCH_REG(10, r10); _MATCH_REG(11, r11); _MATCH_REG(12, r12); _MATCH_REG(13, r13); _MATCH_REG(14, r14); _MATCH_REG(15, r15); } #undef _MATCH_REG panic("%s: unhandled register index %d", __func__, reg); } /* * Obtain the specified register's value. */ static uint64_t kinst_regval(struct trapframe *frame, int reg) { if (reg == -1) return (0); return (((register_t *)frame)[kinst_regoff(reg)]); } static uint32_t kinst_riprel_disp(struct kinst_probe *kp, void *dst) { return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp - (intptr_t)dst)); } static void kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp) { uint8_t *instr; uint32_t disp; int ilen; ilen = kp->kp_md.tinstlen; kinst_memcpy(tramp, kp->kp_md.template, ilen); if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) { disp = kinst_riprel_disp(kp, tramp); kinst_memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t)); } /* * The following position-independent jmp takes us back to the * original code. It is encoded as "jmp *0(%rip)" (six bytes), * followed by the absolute address of the instruction following * the one that was traced (eight bytes). */ tramp[ilen + 0] = 0xff; tramp[ilen + 1] = 0x25; tramp[ilen + 2] = 0x00; tramp[ilen + 3] = 0x00; tramp[ilen + 4] = 0x00; tramp[ilen + 5] = 0x00; instr = kp->kp_patchpoint + kp->kp_md.instlen; kinst_memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t)); } int kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch) { solaris_cpu_t *cpu; uintptr_t *stack, retaddr; struct kinst_probe *kp; struct kinst_probe_md *kpmd; uint8_t *tramp; stack = (uintptr_t *)frame->tf_rsp; cpu = &solaris_cpu[curcpu]; LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) { if ((uintptr_t)kp->kp_patchpoint == addr) break; } if (kp == NULL) return (0); /* * Report the address of the breakpoint for the benefit of consumers * fetching register values with regs[]. */ frame->tf_rip--; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); cpu->cpu_dtrace_caller = stack[0]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0); cpu->cpu_dtrace_caller = 0; kpmd = &kp->kp_md; if ((kpmd->flags & KINST_F_CALL) != 0) { /* * dtrace_invop_start() reserves space on the stack to * store the return address of the call instruction. */ retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen); *(uintptr_t *)scratch = retaddr; if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) { frame->tf_rip = (uintptr_t)(kp->kp_patchpoint + kpmd->disp + kpmd->instlen); } else { register_t rval; if (kpmd->reg1 == -1 && kpmd->reg2 == -1) { /* rip-relative */ rval = frame->tf_rip + kpmd->instlen; } else { /* indirect */ rval = kinst_regval(frame, kpmd->reg1) + (kinst_regval(frame, kpmd->reg2) << kpmd->scale); } if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) { frame->tf_rip = rval + kpmd->disp; } else { frame->tf_rip = *(uintptr_t *)(rval + kpmd->disp); } } return (DTRACE_INVOP_CALL); } else { if ((frame->tf_rflags & PSL_I) == 0) tramp = DPCPU_GET(intr_tramp); else - tramp = curthread->t_kinst; + tramp = curthread->t_kinst_tramp; if (tramp == NULL) { /* * A trampoline allocation failed, so this probe is * effectively disabled. Restore the original * instruction. * * We can't safely print anything here, but the * trampoline allocator should have left a breadcrumb in * the dmesg. */ kinst_patch_tracepoint(kp, kp->kp_savedval); frame->tf_rip = (register_t)kp->kp_patchpoint; } else { kinst_trampoline_populate(kp, tramp); frame->tf_rip = (register_t)tramp; } return (DTRACE_INVOP_NOP); } } void kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val) { register_t reg; int oldwp; reg = intr_disable(); oldwp = disable_wp(); *kp->kp_patchpoint = val; restore_wp(oldwp); intr_restore(reg); } static void kinst_set_disp8(struct kinst_probe *kp, uint8_t byte) { kp->kp_md.disp = (int64_t)(int8_t)byte; } static void kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes) { int32_t disp32; memcpy(&disp32, bytes, sizeof(disp32)); kp->kp_md.disp = (int64_t)disp32; } /* * Set up all of the state needed to faithfully execute a probed instruction. * * In the simple case, we copy the instruction unmodified to a per-thread * trampoline, wherein it is followed by a jump back to the original code. * - Instructions can have %rip as an operand: * - with %rip-relative addressing encoded in ModR/M, or * - implicitly as a part of the instruction definition (jmp, call). * - Call instructions (which may be %rip-relative) need to push the correct * return address onto the stack. * * Call instructions are simple enough to be emulated in software, so we simply * do not use the trampoline mechanism in that case. kinst_invop() will compute * the branch target using the address info computed here (register operands and * displacement). * * %rip-relative operands encoded using the ModR/M byte always use a 32-bit * displacement; when populating the trampoline the displacement is adjusted to * be relative to the trampoline address. Trampolines are always allocated * above KERNBASE for this reason. * * For other %rip-relative operands (just jumps) we take the same approach. * Instructions which specify an 8-bit displacement must be rewritten to use a * 32-bit displacement. */ static int kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr) { struct kinst_probe_md *kpmd; dis86_t d86; uint8_t *bytes, modrm, rex; int dispoff, i, ilen, opcidx; kpmd = &kp->kp_md; d86.d86_data = instr; d86.d86_get_byte = dtrace_dis_get_byte; d86.d86_check_func = NULL; if (dtrace_disx86(&d86, SIZE64) != 0) { KINST_LOG("failed to disassemble instruction at: %p", *instr); return (EINVAL); } bytes = d86.d86_bytes; kpmd->instlen = kpmd->tinstlen = d86.d86_len; /* * Skip over prefixes, save REX. */ rex = 0; for (i = 0; i < kpmd->instlen; i++) { switch (bytes[i]) { case 0xf0 ... 0xf3: /* group 1 */ continue; case 0x26: case 0x2e: case 0x36: case 0x3e: case 0x64: case 0x65: /* group 2 */ continue; case 0x66: /* group 3 */ continue; case 0x67: /* group 4 */ continue; case 0x40 ... 0x4f: /* REX */ rex = bytes[i]; continue; } break; } KASSERT(i < kpmd->instlen, ("%s: failed to disassemble instruction at %p", __func__, bytes)); opcidx = i; /* * Identify instructions of interest by opcode: calls and jumps. * Extract displacements. */ dispoff = -1; switch (bytes[opcidx]) { case 0x0f: switch (bytes[opcidx + 1]) { case 0x80 ... 0x8f: /* conditional jmp near */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 2; kinst_set_disp32(kp, &bytes[dispoff]); break; } break; case 0xe3: /* * There is no straightforward way to translate this instruction * to use a 32-bit displacement. Fortunately, it is rarely * used. */ return (EINVAL); case 0x70 ... 0x7f: /* conditional jmp short */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 1; kinst_set_disp8(kp, bytes[dispoff]); break; case 0xe9: /* unconditional jmp near */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 1; kinst_set_disp32(kp, &bytes[dispoff]); break; case 0xeb: /* unconditional jmp short */ kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; dispoff = opcidx + 1; kinst_set_disp8(kp, bytes[dispoff]); break; case 0xe8: case 0x9a: /* direct call */ kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL; dispoff = opcidx + 1; kinst_set_disp32(kp, &bytes[dispoff]); break; case 0xff: KASSERT(d86.d86_got_modrm, ("no ModR/M byte for instr at %p", *instr - kpmd->instlen)); switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) { case 0x02: case 0x03: /* indirect call */ kpmd->flags |= KINST_F_CALL; break; case 0x04: case 0x05: /* indirect jump */ kpmd->flags |= KINST_F_JMP; break; } } /* * If there's a ModR/M byte, we need to check it to see if the operand * is %rip-relative, and rewrite the displacement if so. If not, we * might still have to extract operand info if this is a call * instruction. */ if (d86.d86_got_modrm) { uint8_t mod, rm, sib; kpmd->reg1 = kpmd->reg2 = -1; modrm = bytes[d86.d86_rmindex]; mod = KINST_MODRM_MOD(modrm); rm = KINST_MODRM_RM(modrm); if (mod == 0 && rm == 5) { kpmd->flags |= KINST_F_RIPREL; dispoff = d86.d86_rmindex + 1; kinst_set_disp32(kp, &bytes[dispoff]); } else if ((kpmd->flags & KINST_F_CALL) != 0) { bool havesib; havesib = (mod != 3 && rm == 4); dispoff = d86.d86_rmindex + (havesib ? 2 : 1); if (mod == 1) kinst_set_disp8(kp, bytes[dispoff]); else if (mod == 2) kinst_set_disp32(kp, &bytes[dispoff]); else if (mod == 3) kpmd->flags |= KINST_F_MOD_DIRECT; if (havesib) { sib = bytes[d86.d86_rmindex + 1]; if (KINST_SIB_BASE(sib) != 5) { kpmd->reg1 = KINST_SIB_BASE(sib) | (KINST_REX_B(rex) << 3); } kpmd->scale = KINST_SIB_SCALE(sib); kpmd->reg2 = KINST_SIB_INDEX(sib) | (KINST_REX_X(rex) << 3); } else { kpmd->reg1 = rm | (KINST_REX_B(rex) << 3); } } } /* * Calls are emulated in software; once operands are decoded we have * nothing else to do. */ if ((kpmd->flags & KINST_F_CALL) != 0) return (0); /* * Allocate and populate an instruction trampoline template. * * Position-independent instructions can simply be copied, but * position-dependent instructions require some surgery: jump * instructions with an 8-bit displacement need to be converted to use a * 32-bit displacement, and the adjusted displacement needs to be * computed. */ ilen = kpmd->instlen; if ((kpmd->flags & KINST_F_RIPREL) != 0) { if ((kpmd->flags & KINST_F_JMP) == 0 || bytes[opcidx] == 0x0f || bytes[opcidx] == 0xe9 || bytes[opcidx] == 0xff) { memcpy(kpmd->template, bytes, dispoff); memcpy(&kpmd->template[dispoff + 4], &bytes[dispoff + 4], ilen - (dispoff + 4)); kpmd->dispoff = dispoff; } else if (bytes[opcidx] == 0xeb) { memcpy(kpmd->template, bytes, opcidx); kpmd->template[opcidx] = 0xe9; kpmd->dispoff = opcidx + 1; /* Instruction length changes from 2 to 5. */ kpmd->tinstlen = 5; kpmd->disp -= 3; } else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) { memcpy(kpmd->template, bytes, opcidx); kpmd->template[opcidx] = 0x0f; kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10; kpmd->dispoff = opcidx + 2; /* Instruction length changes from 2 to 6. */ kpmd->tinstlen = 6; kpmd->disp -= 4; } else { panic("unhandled opcode %#x", bytes[opcidx]); } } else { memcpy(kpmd->template, bytes, ilen); } return (0); } int kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, void *opaque) { struct kinst_probe *kp; dtrace_kinst_probedesc_t *pd; const char *func; int error, instrsize, n, off; uint8_t *instr, *limit; pd = opaque; func = symval->name; if (kinst_excluded(func)) return (0); if (strcmp(func, pd->kpd_func) != 0) return (0); instr = (uint8_t *)symval->value; limit = (uint8_t *)symval->value + symval->size; if (instr >= limit) return (0); /* * Ignore functions not beginning with the usual function prologue. * These might correspond to exception handlers with which we should not * meddle. This does however exclude functions which can be safely * traced, such as cpu_switch(). */ if (*instr != KINST_PUSHL_RBP) return (0); n = 0; while (instr < limit) { instrsize = dtrace_instr_size(instr); off = (int)(instr - (uint8_t *)symval->value); if (pd->kpd_off != -1 && off != pd->kpd_off) { instr += instrsize; continue; } /* * Check for instructions which may enable interrupts. Such * instructions are tricky to trace since it is unclear whether * to use the per-thread or per-CPU trampolines. Since they are * rare, we don't bother to implement special handling for them. * * If the caller specified an offset, return an error, otherwise * silently ignore the instruction so that it remains possible * to enable all instructions in a function. */ if (instrsize == 1 && (instr[0] == KINST_POPF || instr[0] == KINST_STI)) { if (pd->kpd_off != -1) return (EINVAL); instr += instrsize; continue; } /* * Prevent separate dtrace(1) instances from creating copies of * the same probe. */ LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) { if (strcmp(kp->kp_func, func) == 0 && strtol(kp->kp_name, NULL, 10) == off) return (0); } if (++n > KINST_PROBETAB_MAX) { KINST_LOG("probe list full: %d entries", n); return (ENOMEM); } kp = malloc(sizeof(struct kinst_probe), M_KINST, M_WAITOK | M_ZERO); kp->kp_func = func; snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off); kp->kp_savedval = *instr; kp->kp_patchval = KINST_PATCHVAL; kp->kp_patchpoint = instr; error = kinst_instr_dissect(kp, &instr); if (error != 0) return (error); kinst_probe_create(kp, lf); } return (0); } int kinst_md_init(void) { uint8_t *tramp; int cpu; CPU_FOREACH(cpu) { tramp = kinst_trampoline_alloc(M_WAITOK); if (tramp == NULL) return (ENOMEM); DPCPU_ID_SET(cpu, intr_tramp, tramp); } return (0); } void kinst_md_deinit(void) { uint8_t *tramp; int cpu; CPU_FOREACH(cpu) { tramp = DPCPU_ID_GET(cpu, intr_tramp); if (tramp != NULL) { kinst_trampoline_dealloc(tramp); DPCPU_ID_SET(cpu, intr_tramp, NULL); } } } /* * Exclude machine-dependent functions that are not safe-to-trace. */ bool kinst_md_excluded(const char *name) { return (false); } diff --git a/sys/cddl/dev/kinst/trampoline.c b/sys/cddl/dev/kinst/trampoline.c index 93eca6b7e6c8..babbf84f1d88 100644 --- a/sys/cddl/dev/kinst/trampoline.c +++ b/sys/cddl/dev/kinst/trampoline.c @@ -1,324 +1,325 @@ /* * SPDX-License-Identifier: CDDL 1.0 * * Copyright 2022 Christos Margiolis * Copyright 2022 Mark Johnston */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "kinst.h" #include "kinst_isa.h" #define KINST_TRAMP_FILL_PATTERN ((kinst_patchval_t []){KINST_PATCHVAL}) #define KINST_TRAMP_FILL_SIZE sizeof(kinst_patchval_t) #define KINST_TRAMPCHUNK_SIZE PAGE_SIZE #define KINST_TRAMPS_PER_CHUNK (KINST_TRAMPCHUNK_SIZE / KINST_TRAMP_SIZE) struct trampchunk { TAILQ_ENTRY(trampchunk) next; uint8_t *addr; /* 0 -> allocated, 1 -> free */ BITSET_DEFINE(, KINST_TRAMPS_PER_CHUNK) free; }; static TAILQ_HEAD(, trampchunk) kinst_trampchunks = TAILQ_HEAD_INITIALIZER(kinst_trampchunks); static struct sx kinst_tramp_sx; SX_SYSINIT(kinst_tramp_sx, &kinst_tramp_sx, "kinst tramp"); static eventhandler_tag kinst_thread_ctor_handler; static eventhandler_tag kinst_thread_dtor_handler; /* * Fill the trampolines with KINST_TRAMP_FILL_PATTERN so that the kernel will * crash cleanly if things somehow go wrong. */ static void kinst_trampoline_fill(uint8_t *addr, int size) { int i; for (i = 0; i < size; i += KINST_TRAMP_FILL_SIZE) { memcpy(&addr[i], KINST_TRAMP_FILL_PATTERN, KINST_TRAMP_FILL_SIZE); } } static struct trampchunk * kinst_trampchunk_alloc(void) { struct trampchunk *chunk; vm_offset_t trampaddr; int error __diagused; sx_assert(&kinst_tramp_sx, SX_XLOCKED); #ifdef __amd64__ /* * To simplify population of trampolines, we follow the amd64 kernel's * code model and allocate them above KERNBASE, i.e., in the top 2GB of * the kernel's virtual address space (not the case for other * platforms). */ trampaddr = KERNBASE; #else trampaddr = VM_MIN_KERNEL_ADDRESS; #endif /* * Allocate virtual memory for the trampoline chunk. The returned * address is saved in "trampaddr". Trampolines must be executable so * max_prot must include VM_PROT_EXECUTE. */ error = vm_map_find(kernel_map, NULL, 0, &trampaddr, KINST_TRAMPCHUNK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) { KINST_LOG("trampoline chunk allocation failed: %d", error); return (NULL); } error = kmem_back(kernel_object, trampaddr, KINST_TRAMPCHUNK_SIZE, M_WAITOK | M_EXEC); KASSERT(error == KERN_SUCCESS, ("kmem_back failed: %d", error)); kinst_trampoline_fill((uint8_t *)trampaddr, KINST_TRAMPCHUNK_SIZE); /* Allocate a tracker for this chunk. */ chunk = malloc(sizeof(*chunk), M_KINST, M_WAITOK); chunk->addr = (void *)trampaddr; BIT_FILL(KINST_TRAMPS_PER_CHUNK, &chunk->free); TAILQ_INSERT_HEAD(&kinst_trampchunks, chunk, next); return (chunk); } static void kinst_trampchunk_free(struct trampchunk *chunk) { sx_assert(&kinst_tramp_sx, SX_XLOCKED); TAILQ_REMOVE(&kinst_trampchunks, chunk, next); kmem_unback(kernel_object, (vm_offset_t)chunk->addr, KINST_TRAMPCHUNK_SIZE); (void)vm_map_remove(kernel_map, (vm_offset_t)chunk->addr, (vm_offset_t)(chunk->addr + KINST_TRAMPCHUNK_SIZE)); free(chunk, M_KINST); } static uint8_t * kinst_trampoline_alloc_locked(int how) { struct trampchunk *chunk; uint8_t *tramp; int off; sx_assert(&kinst_tramp_sx, SX_XLOCKED); TAILQ_FOREACH(chunk, &kinst_trampchunks, next) { /* All trampolines from this chunk are already allocated. */ if ((off = BIT_FFS(KINST_TRAMPS_PER_CHUNK, &chunk->free)) == 0) continue; /* BIT_FFS() returns indices starting at 1 instead of 0. */ off--; break; } if (chunk == NULL) { if ((how & M_NOWAIT) != 0) return (NULL); /* * We didn't find any free trampoline in the current list, * allocate a new one. If that fails the provider will no * longer be reliable, so try to warn the user. */ if ((chunk = kinst_trampchunk_alloc()) == NULL) { static bool once = true; if (once) { once = false; KINST_LOG( "kinst: failed to allocate trampoline, " "probes may not fire"); } return (NULL); } off = 0; } BIT_CLR(KINST_TRAMPS_PER_CHUNK, off, &chunk->free); tramp = chunk->addr + off * KINST_TRAMP_SIZE; return (tramp); } uint8_t * kinst_trampoline_alloc(int how) { uint8_t *tramp; sx_xlock(&kinst_tramp_sx); tramp = kinst_trampoline_alloc_locked(how); sx_xunlock(&kinst_tramp_sx); return (tramp); } static void kinst_trampoline_dealloc_locked(uint8_t *tramp, bool freechunks) { struct trampchunk *chunk; int off; sx_assert(&kinst_tramp_sx, SX_XLOCKED); if (tramp == NULL) return; TAILQ_FOREACH(chunk, &kinst_trampchunks, next) { for (off = 0; off < KINST_TRAMPS_PER_CHUNK; off++) { if (chunk->addr + off * KINST_TRAMP_SIZE == tramp) { kinst_trampoline_fill(tramp, KINST_TRAMP_SIZE); BIT_SET(KINST_TRAMPS_PER_CHUNK, off, &chunk->free); if (freechunks && BIT_ISFULLSET(KINST_TRAMPS_PER_CHUNK, &chunk->free)) kinst_trampchunk_free(chunk); return; } } } panic("%s: did not find trampoline chunk for %p", __func__, tramp); } void kinst_trampoline_dealloc(uint8_t *tramp) { sx_xlock(&kinst_tramp_sx); kinst_trampoline_dealloc_locked(tramp, true); sx_xunlock(&kinst_tramp_sx); } static void kinst_thread_ctor(void *arg __unused, struct thread *td) { - td->t_kinst = kinst_trampoline_alloc(M_WAITOK); + td->t_kinst_tramp = kinst_trampoline_alloc(M_WAITOK); } static void kinst_thread_dtor(void *arg __unused, struct thread *td) { void *tramp; - tramp = td->t_kinst; - td->t_kinst = NULL; + tramp = td->t_kinst_tramp; + td->t_kinst_tramp = NULL; /* * This assumes that the thread_dtor event permits sleeping, which * appears to be true for the time being. */ kinst_trampoline_dealloc(tramp); } int kinst_trampoline_init(void) { struct proc *p; struct thread *td; void *tramp; int error; kinst_thread_ctor_handler = EVENTHANDLER_REGISTER(thread_ctor, kinst_thread_ctor, NULL, EVENTHANDLER_PRI_ANY); kinst_thread_dtor_handler = EVENTHANDLER_REGISTER(thread_dtor, kinst_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); error = 0; tramp = NULL; sx_slock(&allproc_lock); sx_xlock(&kinst_tramp_sx); FOREACH_PROC_IN_SYSTEM(p) { retry: PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) { - if (td->t_kinst != NULL) + if (td->t_kinst_tramp != NULL) continue; if (tramp == NULL) { /* * Try to allocate a trampoline without dropping * the process lock. If all chunks are fully * utilized, we must release the lock and try * again. */ tramp = kinst_trampoline_alloc_locked(M_NOWAIT); if (tramp == NULL) { PROC_UNLOCK(p); tramp = kinst_trampoline_alloc_locked( M_WAITOK); if (tramp == NULL) { /* * Let the unload handler clean * up. */ error = ENOMEM; goto out; } else goto retry; } } - td->t_kinst = tramp; + td->t_kinst_tramp = tramp; tramp = NULL; } PROC_UNLOCK(p); } out: sx_xunlock(&kinst_tramp_sx); sx_sunlock(&allproc_lock); return (error); } int kinst_trampoline_deinit(void) { struct trampchunk *chunk, *tmp; struct proc *p; struct thread *td; EVENTHANDLER_DEREGISTER(thread_ctor, kinst_thread_ctor_handler); EVENTHANDLER_DEREGISTER(thread_dtor, kinst_thread_dtor_handler); sx_slock(&allproc_lock); sx_xlock(&kinst_tramp_sx); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) { - kinst_trampoline_dealloc_locked(td->t_kinst, false); - td->t_kinst = NULL; + kinst_trampoline_dealloc_locked(td->t_kinst_tramp, + false); + td->t_kinst_tramp = NULL; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); TAILQ_FOREACH_SAFE(chunk, &kinst_trampchunks, next, tmp) kinst_trampchunk_free(chunk); sx_xunlock(&kinst_tramp_sx); return (0); }