diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 8d8bdd6fbb6e..19b0c5065e68 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -1,1231 +1,1231 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include #endif extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); void __noinline trap(struct trapframe *frame); void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, bool, int *, int *); static void trap_fatal(struct trapframe *, vm_offset_t); #ifdef KDTRACE_HOOKS static bool trap_user_dtrace(struct trapframe *, int (**hook)(struct trapframe *)); #endif static const char UNKNOWN[] = "unknown"; static const char *const trap_msg[] = { [0] = UNKNOWN, /* unused */ [T_PRIVINFLT] = "privileged instruction fault", [2] = UNKNOWN, /* unused */ [T_BPTFLT] = "breakpoint instruction fault", [4] = UNKNOWN, /* unused */ [5] = UNKNOWN, /* unused */ [T_ARITHTRAP] = "arithmetic trap", [7] = UNKNOWN, /* unused */ [8] = UNKNOWN, /* unused */ [T_PROTFLT] = "general protection fault", [T_TRCTRAP] = "debug exception", [11] = UNKNOWN, /* unused */ [T_PAGEFLT] = "page fault", [13] = UNKNOWN, /* unused */ [T_ALIGNFLT] = "alignment fault", [15] = UNKNOWN, /* unused */ [16] = UNKNOWN, /* unused */ [17] = UNKNOWN, /* unused */ [T_DIVIDE] = "integer divide fault", [T_NMI] = "non-maskable interrupt trap", [T_OFLOW] = "overflow trap", [T_BOUND] = "FPU bounds check fault", [T_DNA] = "FPU device not available", [T_DOUBLEFLT] = "double fault", [T_FPOPFLT] = "FPU operand fetch fault", [T_TSSFLT] = "invalid TSS fault", [T_SEGNPFLT] = "segment not present fault", [T_STKFLT] = "stack fault", [T_MCHK] = "machine check trap", [T_XMMFLT] = "SIMD floating-point exception", [T_RESERVED] = "reserved (unknown) fault", [31] = UNKNOWN, /* reserved */ [T_DTRACE_RET] = "DTrace pid return trap", }; static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Control L1D flush on return from NMI. * * Tunable can be set to the following values: * 0 - only enable flush on return from NMI if required by vmm.ko (default) * >1 - always flush on return from NMI. * * Post-boot, the sysctl indicates if flushing is currently enabled. */ int nmi_flush_l1d_sw; SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN, &nmi_flush_l1d_sw, 0, "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist"); /* * Table of handlers for various segment load faults. */ static const struct { uintptr_t faddr; uintptr_t fhandler; } sfhandlers[] = { { .faddr = (uintptr_t)ld_ds, .fhandler = (uintptr_t)ds_load_fault, }, { .faddr = (uintptr_t)ld_es, .fhandler = (uintptr_t)es_load_fault, }, { .faddr = (uintptr_t)ld_fs, .fhandler = (uintptr_t)fs_load_fault, }, { .faddr = (uintptr_t)ld_gs, .fhandler = (uintptr_t)gs_load_fault, }, { .faddr = (uintptr_t)ld_gsbase, .fhandler = (uintptr_t)gsbase_load_fault }, { .faddr = (uintptr_t)ld_fsbase, .fhandler = (uintptr_t)fsbase_load_fault, }, }; /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; register_t addr, dr6; size_t i; int pf, signo, ucode; u_int type; td = curthread; p = td->td_proc; dr6 = 0; kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); VM_CNT_INC(v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame)) { uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); } else { switch (type) { case T_NMI: case T_BPTFLT: case T_TRCTRAP: case T_PROTFLT: case T_SEGNPFLT: case T_STKFLT: break; default: printf( "kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } } if (TRAPF_USERMODE(frame)) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != atomic_load_int(&p->p_cowgen)) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ #ifdef KDTRACE_HOOKS if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr)) return; #else enable_intr(); #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_rflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ /* * Can emulator handle this trap? */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) return; pf = trap_pfault(frame, true, &signo, &ucode); if (pf == -1) return; if (pf == 0) goto userret; addr = frame->tf_addr; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; case T_NMI: nmi_handle_intr(type, frame); return; case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: (void)trap_user_dtrace(frame, &dtrace_return_probe_ptr); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void)trap_pfault(frame, false, NULL, NULL); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * In case of PTI, the IRETQ faulted while the * kernel used the pti stack, and exception * frame records %rsp value pointing to that * stack. If we return normally to * doreti_iret_fault, the trapframe is * reconstructed on pti stack, and calltrap() * called on it as well. Due to the very * limited pti stack size, kernel does not * survive for too long. Switch to the normal * thread stack for the trap handling. * * Magic '5' is the number of qwords occupied by * the hardware trap frame. */ if (frame->tf_rip == (long)doreti_iret) { KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled")); frame->tf_rip = (long)doreti_iret_fault; if ((PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3) && (frame->tf_rsp == (uintptr_t)PCPU_GET( pti_rsp0) - 5 * sizeof(register_t))) { frame->tf_rsp = PCPU_GET(rsp0) - 5 * sizeof(register_t); } return; } for (i = 0; i < nitems(sfhandlers); i++) { if (frame->tf_rip == sfhandlers[i].faddr) { KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled")); frame->tf_rip = sfhandlers[i].fhandler; return; } } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (pti) { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall_pti) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall_pti) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti)) return; } else { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt)) return; } if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) || /* Needed for AMD. */ frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; case T_NMI: nmi_handle_intr(type, frame); return; } trap_fatal(frame, 0); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static bool trap_is_smap(struct trapframe *frame) { /* * A page fault on a userspace address is classified as * SMAP-induced if: * - SMAP is supported; * - kernel mode accessed present data page; * - rflags.AC was cleared. * Kernel must never access user space with rflags.AC cleared * if SMAP is enabled. */ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 && (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) == PGEX_P && (frame->tf_rflags & PSL_AC) == 0); } static bool trap_is_pti(struct trapframe *frame) { return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 && pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W | PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) && (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK) == (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK)); } /* * Handle all details of a page fault. * Returns: * -1 if this fault was fatal, typically from kernel mode * (cannot happen, but we need to return something). * 0 if this fault was handled by updating either the user or kernel * page table, execution can continue. * 1 if this fault was from usermode and it was not handled, a synchronous * signal should be delivered to the thread. *signo returns the signal * number, *ucode gives si_code. */ static int trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode) { struct thread *td; struct proc *p; vm_map_t map; vm_offset_t eva; int rv; vm_prot_t ftype; MPASS(!usermode || (signo != NULL && ucode != NULL)); td = curthread; p = td->td_proc; eva = frame->tf_addr; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } if (eva >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) { *signo = SIGSEGV; *ucode = SEGV_MAPERR; return (1); } map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. * * If SMAP is enabled, filter SMAP faults also, * because illegal access might occur to the mapped * user address, causing infinite loop. */ if (!usermode && (td->td_intr_nesting_level != 0 || trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * User-mode protection key violation (PKU). May happen * either from usermode or from kernel if copyin accessed * key-protected mapping. */ if ((frame->tf_err & PGEX_PK) != 0) { if (eva > VM_MAXUSER_ADDRESS) { trap_fatal(frame, eva); return (-1); } if (usermode) { *signo = SIGSEGV; *ucode = SEGV_PKUERR; return (1); } goto after_vmfault; } /* * If nx protection of the usermode portion of kernel page * tables caused trap, panic. */ if (usermode && trap_is_pti(frame)) panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid, p->p_comm, frame->tf_err); /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (usermode) return (1); after_vmfault: if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } static void trap_fatal(struct trapframe *frame, vm_offset_t eva) { int code, ss; u_int type; struct soft_segment_descriptor softseg; struct user_segment_descriptor *gdt; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; gdt = *PCPU_PTR(gdt); sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); printf("\n\nFatal trap %d: %s while in %s mode\n", type, type < nitems(trap_msg) ? trap_msg[type] : UNKNOWN, TRAPF_USERMODE(frame) ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s%s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_PK ? " prot key" : "", code & PGEX_SGX ? " SGX" : "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); ss = frame->tf_ss & 0xffff; printf("stack pointer = 0x%x:0x%lx\n", ss, frame->tf_rsp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(type, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); panic("%s", type < nitems(trap_msg) ? trap_msg[type] : "unknown/reserved trap"); } #ifdef KDTRACE_HOOKS /* * Invoke a userspace DTrace hook. The hook pointer is cleared when no * userspace probes are enabled, so we must synchronize with DTrace to ensure * that a trapping thread is able to call the hook before it is cleared. */ static bool trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *)) { int (*hook)(struct trapframe *); hook = atomic_load_ptr(hookp); enable_intr(); if (hook != NULL) return ((hook)(frame) == 0); return (false); } #endif /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n" "rip %#lx rsp %#lx rbp %#lx\n" "rax %#lx rdx %#lx rbx %#lx\n" "rcx %#lx rsi %#lx rdi %#lx\n" "r8 %#lx r9 %#lx r10 %#lx\n" "r11 %#lx r12 %#lx r13 %#lx\n" "r14 %#lx r15 %#lx rflags %#lx\n" "cs %#lx ss %#lx ds %#hx es %#hx fs %#hx gs %#hx\n" "fsbase %#lx gsbase %#lx kgsbase %#lx\n", frame->tf_rip, frame->tf_rsp, frame->tf_rbp, frame->tf_rax, frame->tf_rdx, frame->tf_rbx, frame->tf_rcx, frame->tf_rdi, frame->tf_rsi, frame->tf_r8, frame->tf_r9, frame->tf_r10, frame->tf_r11, frame->tf_r12, frame->tf_r13, frame->tf_r14, frame->tf_r15, frame->tf_rflags, frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, frame->tf_fs, frame->tf_gs, rdmsr(MSR_FSBASE), rdmsr(MSR_GSBASE), rdmsr(MSR_KGSBASE)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } static int __noinline cpu_fetch_syscall_args_fallback(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; - register_t *argp; + syscallarg_t *argp; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; reg = 0; regcnt = NARGREGS; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; KASSERT(sa->callp->sy_narg <= nitems(sa->args), ("Too many syscall arguments!")); argp = &frame->tf_rdi; argp += reg; memcpy(sa->args, argp, sizeof(sa->args[0]) * NARGREGS); if (sa->callp->sy_narg > regcnt) { params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &sa->args[regcnt], (sa->callp->sy_narg - regcnt) * sizeof(sa->args[0])); if (__predict_false(error != 0)) return (error); } td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_rax; sa->original_code = sa->code; if (__predict_false(sa->code == SYS_syscall || sa->code == SYS___syscall || sa->code >= p->p_sysent->sv_size)) return (cpu_fetch_syscall_args_fallback(td, sa)); sa->callp = &p->p_sysent->sv_table[sa->code]; KASSERT(sa->callp->sy_narg <= nitems(sa->args), ("Too many syscall arguments!")); if (__predict_false(sa->callp->sy_narg > NARGREGS)) return (cpu_fetch_syscall_args_fallback(td, sa)); memcpy(sa->args, &frame->tf_rdi, sizeof(sa->args[0]) * NARGREGS); td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } #include "../../kern/subr_syscall.c" static void (*syscall_ret_l1d_flush)(void); int syscall_ret_l1d_flush_mode; static void flush_l1d_hw(void) { wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D); } static void __noinline amd64_syscall_ret_flush_l1d_check(int error) { void (*p)(void); if (error != EEXIST && error != EAGAIN && error != EXDEV && error != ENOENT && error != ENOTCONN && error != EINPROGRESS) { p = atomic_load_ptr(&syscall_ret_l1d_flush); if (p != NULL) p(); } } static void __inline amd64_syscall_ret_flush_l1d_check_inline(int error) { if (__predict_false(error != 0)) amd64_syscall_ret_flush_l1d_check(error); } void amd64_syscall_ret_flush_l1d(int error) { amd64_syscall_ret_flush_l1d_check_inline(error); } void amd64_syscall_ret_flush_l1d_recalc(void) { bool l1d_hw; l1d_hw = (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0; again: switch (syscall_ret_l1d_flush_mode) { case 0: syscall_ret_l1d_flush = NULL; break; case 1: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : flush_l1d_sw_abi; break; case 2: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : NULL; break; case 3: syscall_ret_l1d_flush = flush_l1d_sw_abi; break; default: syscall_ret_l1d_flush_mode = 1; goto again; } } static int machdep_syscall_ret_flush_l1d(SYSCTL_HANDLER_ARGS) { int error, val; val = syscall_ret_l1d_flush_mode; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); syscall_ret_l1d_flush_mode = val; amd64_syscall_ret_flush_l1d_recalc(); return (0); } SYSCTL_PROC(_machdep, OID_AUTO, syscall_ret_flush_l1d, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, machdep_syscall_ret_flush_l1d, "I", "Flush L1D on syscall return with error (0 - off, 1 - on, " "2 - use hw only, 3 - use sw only)"); /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { ksiginfo_t ksi; kmsan_mark(td->td_frame, sizeof(*td->td_frame), KMSAN_STATE_INITED); #ifdef DIAGNOSTIC if (!TRAPF_USERMODE(td->td_frame)) { panic("syscall"); /* NOT REACHED */ } #endif syscallenter(td); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); KASSERT(pmap_not_in_di(), ("System call %s returning with leaked invl_gen %lu", syscallname(td->td_proc, td->td_sa.code), td->td_md.md_invl_gen.gen)); syscallret(td); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (__predict_false(td->td_frame->tf_rip >= (la57 ? VM_MAXUSER_ADDRESS_LA57 : VM_MAXUSER_ADDRESS_LA48))) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); amd64_syscall_ret_flush_l1d_check_inline(td->td_errno); } diff --git a/sys/arm/arm/syscall.c b/sys/arm/arm/syscall.c index a635de0ec716..c083bf552eb5 100644 --- a/sys/arm/arm/syscall.c +++ b/sys/arm/arm/syscall.c @@ -1,171 +1,171 @@ /* $NetBSD: fault.c,v 1.45 2003/11/20 14:44:36 scw Exp $ */ /*- * Copyright 2004 Olivier Houchard * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1994-1997 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * fault.c * * Fault handlers * * Created : 28/11/94 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include void swi_handler(struct trapframe *); int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; - register_t *ap; + syscallarg_t *ap; struct syscall_args *sa; u_int nap; int error; nap = 4; sa = &td->td_sa; sa->code = td->td_frame->tf_r7; sa->original_code = sa->code; ap = &td->td_frame->tf_r0; if (sa->code == SYS_syscall) { sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[_QUAD_LOWWORD]; nap -= 2; ap += 2; } p = td->td_proc; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; error = 0; - memcpy(sa->args, ap, nap * sizeof(register_t)); + memcpy(sa->args, ap, nap * sizeof(*sa->args)); if (sa->callp->sy_narg > nap) { error = copyin((void *)td->td_frame->tf_usr_sp, sa->args + - nap, (sa->callp->sy_narg - nap) * sizeof(register_t)); + nap, (sa->callp->sy_narg - nap) * sizeof(*sa->args)); } if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = 0; } return (error); } #include "../../kern/subr_syscall.c" static void syscall(struct thread *td, struct trapframe *frame) { syscallenter(td); syscallret(td); } void swi_handler(struct trapframe *frame) { struct thread *td = curthread; td->td_frame = frame; td->td_pticks = 0; /* * Enable interrupts if they were enabled before the exception. * Since all syscalls *should* come from user mode it will always * be safe to enable them, but check anyway. */ if (td->td_md.md_spinlock_count == 0) { if (__predict_true(frame->tf_spsr & PSR_I) == 0) enable_interrupts(PSR_I); if (__predict_true(frame->tf_spsr & PSR_F) == 0) enable_interrupts(PSR_F); } syscall(td, frame); } diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c index 226f69592952..b664501b62fa 100644 --- a/sys/arm64/arm64/trap.c +++ b/sys/arm64/arm64/trap.c @@ -1,688 +1,688 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif #ifdef VFP #include #endif #ifdef KDB #include #endif #ifdef DDB #include #include #endif /* Called from exception.S */ void do_el1h_sync(struct thread *, struct trapframe *); void do_el0_sync(struct thread *, struct trapframe *); void do_el0_error(struct trapframe *); void do_serror(struct trapframe *); void unhandled_exception(struct trapframe *); static void print_gp_register(const char *name, uint64_t value); static void print_registers(struct trapframe *frame); int (*dtrace_invop_jump_addr)(struct trapframe *); typedef void (abort_handler)(struct thread *, struct trapframe *, uint64_t, uint64_t, int); static abort_handler align_abort; static abort_handler data_abort; static abort_handler external_abort; static abort_handler *abort_handlers[] = { [ISS_DATA_DFSC_TF_L0] = data_abort, [ISS_DATA_DFSC_TF_L1] = data_abort, [ISS_DATA_DFSC_TF_L2] = data_abort, [ISS_DATA_DFSC_TF_L3] = data_abort, [ISS_DATA_DFSC_AFF_L1] = data_abort, [ISS_DATA_DFSC_AFF_L2] = data_abort, [ISS_DATA_DFSC_AFF_L3] = data_abort, [ISS_DATA_DFSC_PF_L1] = data_abort, [ISS_DATA_DFSC_PF_L2] = data_abort, [ISS_DATA_DFSC_PF_L3] = data_abort, [ISS_DATA_DFSC_ALIGN] = align_abort, [ISS_DATA_DFSC_EXT] = external_abort, [ISS_DATA_DFSC_EXT_L0] = external_abort, [ISS_DATA_DFSC_EXT_L1] = external_abort, [ISS_DATA_DFSC_EXT_L2] = external_abort, [ISS_DATA_DFSC_EXT_L3] = external_abort, [ISS_DATA_DFSC_ECC] = external_abort, [ISS_DATA_DFSC_ECC_L0] = external_abort, [ISS_DATA_DFSC_ECC_L1] = external_abort, [ISS_DATA_DFSC_ECC_L2] = external_abort, [ISS_DATA_DFSC_ECC_L3] = external_abort, }; static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; ksi.ksi_trapno = trapno; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; - register_t *ap, *dst_ap; + syscallarg_t *ap, *dst_ap; struct syscall_args *sa; p = td->td_proc; sa = &td->td_sa; ap = td->td_frame->tf_x; dst_ap = &sa->args[0]; sa->code = td->td_frame->tf_x[8]; sa->original_code = sa->code; if (__predict_false(sa->code == SYS_syscall || sa->code == SYS___syscall)) { sa->code = *ap++; } else { *dst_ap++ = *ap++; } if (__predict_false(sa->code >= p->p_sysent->sv_size)) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; KASSERT(sa->callp->sy_narg <= nitems(sa->args), ("Syscall %d takes too many arguments", sa->code)); - memcpy(dst_ap, ap, (nitems(sa->args) - 1) * sizeof(register_t)); + memcpy(dst_ap, ap, (nitems(sa->args) - 1) * sizeof(*dst_ap)); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" /* * Test for fault generated by given access instruction in * bus_peek_ or bus_poke_ bus function. */ extern uint32_t generic_bs_peek_1f, generic_bs_peek_2f; extern uint32_t generic_bs_peek_4f, generic_bs_peek_8f; extern uint32_t generic_bs_poke_1f, generic_bs_poke_2f; extern uint32_t generic_bs_poke_4f, generic_bs_poke_8f; static bool test_bs_fault(void *addr) { return (addr == &generic_bs_peek_1f || addr == &generic_bs_peek_2f || addr == &generic_bs_peek_4f || addr == &generic_bs_peek_8f || addr == &generic_bs_poke_1f || addr == &generic_bs_poke_2f || addr == &generic_bs_poke_4f || addr == &generic_bs_poke_8f); } static void svc_handler(struct thread *td, struct trapframe *frame) { if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) { syscallenter(td); syscallret(td); } else { call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr, ESR_ELx_EXCEPTION(frame->tf_esr)); userret(td, frame); } } static void align_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { if (!lower) { print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); panic("Misaligned access from kernel space!"); } call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr, ESR_ELx_EXCEPTION(frame->tf_esr)); userret(td, frame); } static void external_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { /* * Try to handle synchronous external aborts caused by * bus_space_peek() and/or bus_space_poke() functions. */ if (!lower && test_bs_fault((void *)frame->tf_elr)) { frame->tf_elr = (uint64_t)generic_bs_fault; return; } print_registers(frame); print_gp_register("far", far); panic("Unhandled EL%d external data abort", lower ? 0: 1); } static void data_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { struct vm_map *map; struct proc *p; struct pcb *pcb; vm_prot_t ftype; int error, sig, ucode; #ifdef KDB bool handled; #endif /* * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive * and Store-Exclusive instruction usage restrictions", state * of the exclusive monitors after data abort exception is unknown. */ clrex(); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif pcb = td->td_pcb; p = td->td_proc; if (lower) map = &p->p_vmspace->vm_map; else { intr_enable(); /* We received a TBI/PAC/etc. fault from the kernel */ if (!ADDR_IS_CANONICAL(far)) { error = KERN_INVALID_ADDRESS; goto bad_far; } /* The top bit tells us which range to use */ if (ADDR_IS_KERNEL(far)) { map = kernel_map; } else { map = &p->p_vmspace->vm_map; if (map == NULL) map = kernel_map; } } /* * Try to handle translation, access flag, and permission faults. * Translation faults may occur as a result of the required * break-before-make sequence used when promoting or demoting * superpages. Such faults must not occur while holding the pmap lock, * or pmap_fault() will recurse on that lock. */ if ((lower || map == kernel_map || pcb->pcb_onfault != 0) && pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) return; KASSERT(td->td_md.md_spinlock_count == 0, ("data abort with spinlock held")); if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); panic("data abort in critical section or under mutex"); } switch (ESR_ELx_EXCEPTION(esr)) { case EXCP_INSN_ABORT: case EXCP_INSN_ABORT_L: ftype = VM_PROT_EXECUTE; break; default: /* * If the exception was because of a read or cache operation * pass a read fault type into the vm code. Cache operations * need read permission but will set the WnR flag when the * memory is unmapped. */ if ((esr & ISS_DATA_WnR) == 0 || (esr & ISS_DATA_CM) != 0) ftype = VM_PROT_READ; else ftype = VM_PROT_WRITE; break; } /* Fault in the page. */ error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { bad_far: if (lower) { call_trapsignal(td, sig, ucode, (void *)far, ESR_ELx_EXCEPTION(esr)); } else { if (td->td_intr_nesting_level == 0 && pcb->pcb_onfault != 0) { frame->tf_x[0] = error; frame->tf_elr = pcb->pcb_onfault; return; } printf("Fatal data abort:\n"); print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(ESR_ELx_EXCEPTION(esr), 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("vm_fault failed: %lx error %d", frame->tf_elr, error); } } if (lower) userret(td, frame); } static void print_gp_register(const char *name, uint64_t value) { #if defined(DDB) c_db_sym_t sym; const char *sym_name; db_expr_t sym_value; db_expr_t offset; #endif printf(" %s: %16lx", name, value); #if defined(DDB) /* If this looks like a kernel address try to find the symbol */ if (value >= VM_MIN_KERNEL_ADDRESS) { sym = db_search_symbol(value, DB_STGY_ANY, &offset); if (sym != C_DB_SYM_NULL) { db_symbol_values(sym, &sym_name, &sym_value); printf(" (%s + %lx)", sym_name, offset); } } #endif printf("\n"); } static void print_registers(struct trapframe *frame) { char name[4]; u_int reg; for (reg = 0; reg < nitems(frame->tf_x); reg++) { snprintf(name, sizeof(name), "%sx%d", (reg < 10) ? " " : "", reg); print_gp_register(name, frame->tf_x[reg]); } printf(" sp: %16lx\n", frame->tf_sp); print_gp_register(" lr", frame->tf_lr); print_gp_register("elr", frame->tf_elr); printf("spsr: %8x\n", frame->tf_spsr); } void do_el1h_sync(struct thread *td, struct trapframe *frame) { uint32_t exception; uint64_t esr, far; int dfsc; /* Read the esr register to get the exception details */ esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR4(KTR_TRAP, "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); /* * Enable debug exceptions if we aren't already handling one. They will * be masked again in the exception handler's epilogue. */ if (exception != EXCP_BRK && exception != EXCP_WATCHPT_EL1 && exception != EXCP_SOFTSTP_EL1) dbg_enable(); switch (exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP if ((td->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) { vfp_restore_state(); } else #endif { print_registers(frame); printf(" esr: %.8lx\n", esr); panic("VFP exception in the kernel"); } break; case EXCP_INSN_ABORT: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); dfsc = esr & ISS_DATA_DFSC_MASK; if (dfsc < nitems(abort_handlers) && abort_handlers[dfsc] != NULL) { abort_handlers[dfsc](td, frame, esr, far, 0); } else { print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); panic("Unhandled EL1 %s abort: %x", exception == EXCP_INSN_ABORT ? "instruction" : "data", dfsc); } break; case EXCP_BRK: #ifdef KDTRACE_HOOKS if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \ dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(frame); break; } #endif #ifdef KDB kdb_trap(exception, 0, frame); #else panic("No debugger in kernel."); #endif break; case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: #ifdef KDB kdb_trap(exception, 0, frame); #else panic("No debugger in kernel."); #endif break; case EXCP_FPAC: /* We can see this if the authentication on PAC fails */ print_registers(frame); printf(" far: %16lx\n", READ_SPECIALREG(far_el1)); panic("FPAC kernel exception"); break; case EXCP_UNKNOWN: if (undef_insn(1, frame)) break; printf("Undefined instruction: %08x\n", *(uint32_t *)frame->tf_elr); /* FALLTHROUGH */ default: print_registers(frame); print_gp_register("far", READ_SPECIALREG(far_el1)); panic("Unknown kernel exception %x esr_el1 %lx", exception, esr); } } void do_el0_sync(struct thread *td, struct trapframe *frame) { pcpu_bp_harden bp_harden; uint32_t exception; uint64_t esr, far; int dfsc; /* Check we have a sane environment when entering from userland */ KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS, ("Invalid pcpu address from userland: %p (tpidr %lx)", get_pcpu(), READ_SPECIALREG(tpidr_el1))); esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); switch (exception) { case EXCP_INSN_ABORT_L: far = READ_SPECIALREG(far_el1); /* * Userspace may be trying to train the branch predictor to * attack the kernel. If we are on a CPU affected by this * call the handler to clear the branch predictor state. */ if (far > VM_MAXUSER_ADDRESS) { bp_harden = PCPU_GET(bp_harden); if (bp_harden != NULL) bp_harden(); } break; case EXCP_UNKNOWN: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: case EXCP_WATCHPT_EL0: far = READ_SPECIALREG(far_el1); break; } intr_enable(); CTR4(KTR_TRAP, "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); switch (exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP vfp_restore_state(); #else panic("VFP exception in userland"); #endif break; case EXCP_SVC32: case EXCP_SVC64: svc_handler(td, frame); break; case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: dfsc = esr & ISS_DATA_DFSC_MASK; if (dfsc < nitems(abort_handlers) && abort_handlers[dfsc] != NULL) abort_handlers[dfsc](td, frame, esr, far, 1); else { print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); panic("Unhandled EL0 %s abort: %x", exception == EXCP_INSN_ABORT_L ? "instruction" : "data", dfsc); } break; case EXCP_UNKNOWN: if (!undef_insn(0, frame)) call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far, exception); userret(td, frame); break; case EXCP_FPAC: call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_SP_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp, exception); userret(td, frame); break; case EXCP_PC_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_BRKPT_EL0: case EXCP_BRK: #ifdef COMPAT_FREEBSD32 case EXCP_BRKPT_32: #endif /* COMPAT_FREEBSD32 */ call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_WATCHPT_EL0: call_trapsignal(td, SIGTRAP, TRAP_TRACE, (void *)far, exception); userret(td, frame); break; case EXCP_MSR: /* * The CPU can raise EXCP_MSR when userspace executes an mrs * instruction to access a special register userspace doesn't * have access to. */ if (!undef_insn(0, frame)) call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_SOFTSTP_EL0: PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) & ~MDSCR_SS); } PROC_UNLOCK(td->td_proc); call_trapsignal(td, SIGTRAP, TRAP_TRACE, (void *)frame->tf_elr, exception); userret(td, frame); break; default: call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr, exception); userret(td, frame); break; } KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Kernel VFP flags set while entering userspace")); KASSERT( td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate, ("Kernel VFP state in use when entering userspace")); } /* * TODO: We will need to handle these later when we support ARMv8.2 RAS. */ void do_serror(struct trapframe *frame) { uint64_t esr, far; far = READ_SPECIALREG(far_el1); esr = frame->tf_esr; print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); panic("Unhandled System Error"); } void unhandled_exception(struct trapframe *frame) { uint64_t esr, far; far = READ_SPECIALREG(far_el1); esr = frame->tf_esr; print_registers(frame); print_gp_register("far", far); printf(" esr: %.8lx\n", esr); panic("Unhandled exception"); } diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index f102f7e7b0a4..be3081ac9983 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,1393 +1,1393 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The ktrace facility allows the tracing of certain key events in user space * processes, such as system calls, signal delivery, context switches, and * user generated events using utrace(2). It works by streaming event * records and data to a vnode associated with the process using the * ktrace(2) system call. In general, records can be written directly from * the context that generates the event. One important exception to this is * during a context switch, where sleeping is not permitted. To handle this * case, trace events are generated using in-kernel ktr_request records, and * then delivered to disk at a convenient moment -- either immediately, the * next traceable event, at system call return, or at process exit. * * When dealing with multiple threads or processes writing to the same event * log, ordering guarantees are weak: specifically, if an event has multiple * records (i.e., system call enter and return), they may be interlaced with * records from another event. Process and thread ID information is provided * in the record, and user applications can de-interlace events if required. */ static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); #ifdef KTRACE FEATURE(ktrace, "Kernel support for system-call tracing"); #ifndef KTRACE_REQUEST_POOL #define KTRACE_REQUEST_POOL 100 #endif struct ktr_request { struct ktr_header ktr_header; void *ktr_buffer; union { struct ktr_proc_ctor ktr_proc_ctor; struct ktr_cap_fail ktr_cap_fail; struct ktr_syscall ktr_syscall; struct ktr_sysret ktr_sysret; struct ktr_genio ktr_genio; struct ktr_psig ktr_psig; struct ktr_csw ktr_csw; struct ktr_fault ktr_fault; struct ktr_faultend ktr_faultend; struct ktr_struct_array ktr_struct_array; } ktr_data; STAILQ_ENTRY(ktr_request) ktr_list; }; static int data_lengths[] = { [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), [KTR_SYSRET] = sizeof(struct ktr_sysret), [KTR_NAMEI] = 0, [KTR_GENIO] = sizeof(struct ktr_genio), [KTR_PSIG] = sizeof(struct ktr_psig), [KTR_CSW] = sizeof(struct ktr_csw), [KTR_USER] = 0, [KTR_STRUCT] = 0, [KTR_SYSCTL] = 0, [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), [KTR_PROCDTOR] = 0, [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), [KTR_FAULT] = sizeof(struct ktr_fault), [KTR_FAULTEND] = sizeof(struct ktr_faultend), [KTR_STRUCT_ARRAY] = sizeof(struct ktr_struct_array), }; static STAILQ_HEAD(, ktr_request) ktr_free; static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "KTRACE options"); static u_int ktr_requestpool = KTRACE_REQUEST_POOL; TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); u_int ktr_geniosize = PAGE_SIZE; SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 0, "Maximum size of genio event payload"); /* * Allow to not to send signal to traced process, in which context the * ktr record is written. The limit is applied from the process that * set up ktrace, so killing the traced process is not completely fair. */ int ktr_filesize_limit_signal = 0; SYSCTL_INT(_kern_ktrace, OID_AUTO, filesize_limit_signal, CTLFLAG_RWTUN, &ktr_filesize_limit_signal, 0, "Send SIGXFSZ to the traced process when the log size limit is exceeded"); static int print_message = 1; static struct mtx ktrace_mtx; static struct sx ktrace_sx; struct ktr_io_params { struct vnode *vp; struct ucred *cr; off_t lim; u_int refs; }; static void ktrace_init(void *dummy); static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); static struct ktr_request *ktr_getrequest(int type); static void ktr_submitrequest(struct thread *td, struct ktr_request *req); static struct ktr_io_params *ktr_freeproc(struct proc *p); static void ktr_freerequest(struct ktr_request *req); static void ktr_freerequest_locked(struct ktr_request *req); static void ktr_writerequest(struct thread *td, struct ktr_request *req); static int ktrcanset(struct thread *,struct proc *); static int ktrsetchildren(struct thread *, struct proc *, int, int, struct ktr_io_params *); static int ktrops(struct thread *, struct proc *, int, int, struct ktr_io_params *); static void ktrprocctor_entered(struct thread *, struct proc *); /* * ktrace itself generates events, such as context switches, which we do not * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine * whether or not it is in a region where tracing of events should be * suppressed. */ static void ktrace_enter(struct thread *td) { KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); td->td_pflags |= TDP_INKTRACE; } static void ktrace_exit(struct thread *td) { KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); td->td_pflags &= ~TDP_INKTRACE; } static void ktrace_assert(struct thread *td) { KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); } static void ktrace_init(void *dummy) { struct ktr_request *req; int i; mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); sx_init(&ktrace_sx, "ktrace_sx"); STAILQ_INIT(&ktr_free); for (i = 0; i < ktr_requestpool; i++) { req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK | M_ZERO); STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); } } SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) { struct thread *td; u_int newsize, oldsize, wantsize; int error; /* Handle easy read-only case first to avoid warnings from GCC. */ if (!req->newptr) { oldsize = ktr_requestpool; return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); } error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); if (error) return (error); td = curthread; ktrace_enter(td); oldsize = ktr_requestpool; newsize = ktrace_resize_pool(oldsize, wantsize); ktrace_exit(td); error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); if (error) return (error); if (wantsize > oldsize && newsize < wantsize) return (ENOSPC); return (0); } SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "Pool buffer size for ktrace(1)"); static u_int ktrace_resize_pool(u_int oldsize, u_int newsize) { STAILQ_HEAD(, ktr_request) ktr_new; struct ktr_request *req; int bound; print_message = 1; bound = newsize - oldsize; if (bound == 0) return (ktr_requestpool); if (bound < 0) { mtx_lock(&ktrace_mtx); /* Shrink pool down to newsize if possible. */ while (bound++ < 0) { req = STAILQ_FIRST(&ktr_free); if (req == NULL) break; STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); ktr_requestpool--; free(req, M_KTRACE); } } else { /* Grow pool up to newsize. */ STAILQ_INIT(&ktr_new); while (bound-- > 0) { req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK | M_ZERO); STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); } mtx_lock(&ktrace_mtx); STAILQ_CONCAT(&ktr_free, &ktr_new); ktr_requestpool += (newsize - oldsize); } mtx_unlock(&ktrace_mtx); return (ktr_requestpool); } /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == (sizeof((struct thread *)NULL)->td_name)); static struct ktr_request * ktr_getrequest_entered(struct thread *td, int type) { struct ktr_request *req; struct proc *p = td->td_proc; int pm; mtx_lock(&ktrace_mtx); if (!KTRCHECK(td, type)) { mtx_unlock(&ktrace_mtx); return (NULL); } req = STAILQ_FIRST(&ktr_free); if (req != NULL) { STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); req->ktr_header.ktr_type = type; if (p->p_traceflag & KTRFAC_DROP) { req->ktr_header.ktr_type |= KTR_DROP; p->p_traceflag &= ~KTRFAC_DROP; } mtx_unlock(&ktrace_mtx); microtime(&req->ktr_header.ktr_time); req->ktr_header.ktr_pid = p->p_pid; req->ktr_header.ktr_tid = td->td_tid; bcopy(td->td_name, req->ktr_header.ktr_comm, sizeof(req->ktr_header.ktr_comm)); req->ktr_buffer = NULL; req->ktr_header.ktr_len = 0; } else { p->p_traceflag |= KTRFAC_DROP; pm = print_message; print_message = 0; mtx_unlock(&ktrace_mtx); if (pm) printf("Out of ktrace request objects.\n"); } return (req); } static struct ktr_request * ktr_getrequest(int type) { struct thread *td = curthread; struct ktr_request *req; ktrace_enter(td); req = ktr_getrequest_entered(td, type); if (req == NULL) ktrace_exit(td); return (req); } /* * Some trace generation environments don't permit direct access to VFS, * such as during a context switch where sleeping is not allowed. Under these * circumstances, queue a request to the thread to be written asynchronously * later. */ static void ktr_enqueuerequest(struct thread *td, struct ktr_request *req) { mtx_lock(&ktrace_mtx); STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); mtx_unlock(&ktrace_mtx); thread_lock(td); td->td_flags |= TDF_ASTPENDING; thread_unlock(td); } /* * Drain any pending ktrace records from the per-thread queue to disk. This * is used both internally before committing other records, and also on * system call return. We drain all the ones we can find at the time when * drain is requested, but don't keep draining after that as those events * may be approximately "after" the current event. */ static void ktr_drain(struct thread *td) { struct ktr_request *queued_req; STAILQ_HEAD(, ktr_request) local_queue; ktrace_assert(td); sx_assert(&ktrace_sx, SX_XLOCKED); STAILQ_INIT(&local_queue); if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { mtx_lock(&ktrace_mtx); STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); mtx_unlock(&ktrace_mtx); while ((queued_req = STAILQ_FIRST(&local_queue))) { STAILQ_REMOVE_HEAD(&local_queue, ktr_list); ktr_writerequest(td, queued_req); ktr_freerequest(queued_req); } } } /* * Submit a trace record for immediate commit to disk -- to be used only * where entering VFS is OK. First drain any pending records that may have * been cached in the thread. */ static void ktr_submitrequest(struct thread *td, struct ktr_request *req) { ktrace_assert(td); sx_xlock(&ktrace_sx); ktr_drain(td); ktr_writerequest(td, req); ktr_freerequest(req); sx_xunlock(&ktrace_sx); ktrace_exit(td); } static void ktr_freerequest(struct ktr_request *req) { mtx_lock(&ktrace_mtx); ktr_freerequest_locked(req); mtx_unlock(&ktrace_mtx); } static void ktr_freerequest_locked(struct ktr_request *req) { mtx_assert(&ktrace_mtx, MA_OWNED); if (req->ktr_buffer != NULL) free(req->ktr_buffer, M_KTRACE); STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); } static void ktr_io_params_ref(struct ktr_io_params *kiop) { mtx_assert(&ktrace_mtx, MA_OWNED); kiop->refs++; } static struct ktr_io_params * ktr_io_params_rele(struct ktr_io_params *kiop) { mtx_assert(&ktrace_mtx, MA_OWNED); if (kiop == NULL) return (NULL); KASSERT(kiop->refs > 0, ("kiop ref == 0 %p", kiop)); return (--(kiop->refs) == 0 ? kiop : NULL); } void ktr_io_params_free(struct ktr_io_params *kiop) { if (kiop == NULL) return; MPASS(kiop->refs == 0); vn_close(kiop->vp, FWRITE, kiop->cr, curthread); crfree(kiop->cr); free(kiop, M_KTRACE); } static struct ktr_io_params * ktr_io_params_alloc(struct thread *td, struct vnode *vp) { struct ktr_io_params *res; res = malloc(sizeof(struct ktr_io_params), M_KTRACE, M_WAITOK); res->vp = vp; res->cr = crhold(td->td_ucred); res->lim = lim_cur(td, RLIMIT_FSIZE); res->refs = 1; return (res); } /* * Disable tracing for a process and release all associated resources. * The caller is responsible for releasing a reference on the returned * vnode and credentials. */ static struct ktr_io_params * ktr_freeproc(struct proc *p) { struct ktr_io_params *kiop; struct ktr_request *req; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&ktrace_mtx, MA_OWNED); kiop = ktr_io_params_rele(p->p_ktrioparms); p->p_ktrioparms = NULL; p->p_traceflag = 0; while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); ktr_freerequest_locked(req); } return (kiop); } struct vnode * ktr_get_tracevp(struct proc *p, bool ref) { struct vnode *vp; PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_ktrioparms != NULL) { vp = p->p_ktrioparms->vp; if (ref) vrefact(vp); } else { vp = NULL; } return (vp); } void -ktrsyscall(int code, int narg, register_t args[]) +ktrsyscall(int code, int narg, syscallarg_t args[]) { struct ktr_request *req; struct ktr_syscall *ktp; size_t buflen; char *buf = NULL; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; buflen = sizeof(register_t) * narg; if (buflen > 0) { buf = malloc(buflen, M_KTRACE, M_WAITOK); bcopy(args, buf, buflen); } req = ktr_getrequest(KTR_SYSCALL); if (req == NULL) { if (buf != NULL) free(buf, M_KTRACE); return; } ktp = &req->ktr_data.ktr_syscall; ktp->ktr_code = code; ktp->ktr_narg = narg; if (buflen > 0) { req->ktr_header.ktr_len = buflen; req->ktr_buffer = buf; } ktr_submitrequest(curthread, req); } void ktrsysret(int code, int error, register_t retval) { struct ktr_request *req; struct ktr_sysret *ktp; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_SYSRET); if (req == NULL) return; ktp = &req->ktr_data.ktr_sysret; ktp->ktr_code = code; ktp->ktr_error = error; ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ ktr_submitrequest(curthread, req); } /* * When a setuid process execs, disable tracing. * * XXX: We toss any pending asynchronous records. */ struct ktr_io_params * ktrprocexec(struct proc *p) { struct ktr_io_params *kiop; PROC_LOCK_ASSERT(p, MA_OWNED); kiop = p->p_ktrioparms; if (kiop == NULL || priv_check_cred(kiop->cr, PRIV_DEBUG_DIFFCRED)) return (NULL); mtx_lock(&ktrace_mtx); kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); return (kiop); } /* * When a process exits, drain per-process asynchronous trace records * and disable tracing. */ void ktrprocexit(struct thread *td) { struct ktr_request *req; struct proc *p; struct ktr_io_params *kiop; p = td->td_proc; if (p->p_traceflag == 0) return; ktrace_enter(td); req = ktr_getrequest_entered(td, KTR_PROCDTOR); if (req != NULL) ktr_enqueuerequest(td, req); sx_xlock(&ktrace_sx); ktr_drain(td); sx_xunlock(&ktrace_sx); PROC_LOCK(p); mtx_lock(&ktrace_mtx); kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); ktr_io_params_free(kiop); ktrace_exit(td); } static void ktrprocctor_entered(struct thread *td, struct proc *p) { struct ktr_proc_ctor *ktp; struct ktr_request *req; struct thread *td2; ktrace_assert(td); td2 = FIRST_THREAD_IN_PROC(p); req = ktr_getrequest_entered(td2, KTR_PROCCTOR); if (req == NULL) return; ktp = &req->ktr_data.ktr_proc_ctor; ktp->sv_flags = p->p_sysent->sv_flags; ktr_enqueuerequest(td2, req); } void ktrprocctor(struct proc *p) { struct thread *td = curthread; if ((p->p_traceflag & KTRFAC_MASK) == 0) return; ktrace_enter(td); ktrprocctor_entered(td, p); ktrace_exit(td); } /* * When a process forks, enable tracing in the new process if needed. */ void ktrprocfork(struct proc *p1, struct proc *p2) { MPASS(p2->p_ktrioparms == NULL); MPASS(p2->p_traceflag == 0); if (p1->p_traceflag == 0) return; PROC_LOCK(p1); mtx_lock(&ktrace_mtx); if (p1->p_traceflag & KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_ktrioparms = p1->p_ktrioparms) != NULL) p1->p_ktrioparms->refs++; } mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p1); ktrprocctor(p2); } /* * When a thread returns, drain any asynchronous records generated by the * system call. */ void ktruserret(struct thread *td) { ktrace_enter(td); sx_xlock(&ktrace_sx); ktr_drain(td); sx_xunlock(&ktrace_sx); ktrace_exit(td); } void ktrnamei(const char *path) { struct ktr_request *req; int namelen; char *buf = NULL; namelen = strlen(path); if (namelen > 0) { buf = malloc(namelen, M_KTRACE, M_WAITOK); bcopy(path, buf, namelen); } req = ktr_getrequest(KTR_NAMEI); if (req == NULL) { if (buf != NULL) free(buf, M_KTRACE); return; } if (namelen > 0) { req->ktr_header.ktr_len = namelen; req->ktr_buffer = buf; } ktr_submitrequest(curthread, req); } void ktrsysctl(int *name, u_int namelen) { struct ktr_request *req; u_int mib[CTL_MAXNAME + 2]; char *mibname; size_t mibnamelen; int error; /* Lookup name of mib. */ KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); mib[0] = 0; mib[1] = 1; bcopy(name, mib + 2, namelen * sizeof(*name)); mibnamelen = 128; mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, NULL, 0, &mibnamelen, 0); if (error) { free(mibname, M_KTRACE); return; } req = ktr_getrequest(KTR_SYSCTL); if (req == NULL) { free(mibname, M_KTRACE); return; } req->ktr_header.ktr_len = mibnamelen; req->ktr_buffer = mibname; ktr_submitrequest(curthread, req); } void ktrgenio(int fd, enum uio_rw rw, struct uio *uio, int error) { struct ktr_request *req; struct ktr_genio *ktg; int datalen; char *buf; if (error) { free(uio, M_IOV); return; } uio->uio_offset = 0; uio->uio_rw = UIO_WRITE; datalen = MIN(uio->uio_resid, ktr_geniosize); buf = malloc(datalen, M_KTRACE, M_WAITOK); error = uiomove(buf, datalen, uio); free(uio, M_IOV); if (error) { free(buf, M_KTRACE); return; } req = ktr_getrequest(KTR_GENIO); if (req == NULL) { free(buf, M_KTRACE); return; } ktg = &req->ktr_data.ktr_genio; ktg->ktr_fd = fd; ktg->ktr_rw = rw; req->ktr_header.ktr_len = datalen; req->ktr_buffer = buf; ktr_submitrequest(curthread, req); } void ktrpsig(int sig, sig_t action, sigset_t *mask, int code) { struct thread *td = curthread; struct ktr_request *req; struct ktr_psig *kp; req = ktr_getrequest(KTR_PSIG); if (req == NULL) return; kp = &req->ktr_data.ktr_psig; kp->signo = (char)sig; kp->action = action; kp->mask = *mask; kp->code = code; ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrcsw(int out, int user, const char *wmesg) { struct thread *td = curthread; struct ktr_request *req; struct ktr_csw *kc; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_CSW); if (req == NULL) return; kc = &req->ktr_data.ktr_csw; kc->out = out; kc->user = user; if (wmesg != NULL) strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); else bzero(kc->wmesg, sizeof(kc->wmesg)); ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrstruct(const char *name, const void *data, size_t datalen) { struct ktr_request *req; char *buf; size_t buflen, namelen; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; if (data == NULL) datalen = 0; namelen = strlen(name) + 1; buflen = namelen + datalen; buf = malloc(buflen, M_KTRACE, M_WAITOK); strcpy(buf, name); bcopy(data, buf + namelen, datalen); if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { free(buf, M_KTRACE); return; } req->ktr_buffer = buf; req->ktr_header.ktr_len = buflen; ktr_submitrequest(curthread, req); } void ktrstruct_error(const char *name, const void *data, size_t datalen, int error) { if (error == 0) ktrstruct(name, data, datalen); } void ktrstructarray(const char *name, enum uio_seg seg, const void *data, int num_items, size_t struct_size) { struct ktr_request *req; struct ktr_struct_array *ksa; char *buf; size_t buflen, datalen, namelen; int max_items; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; if (num_items < 0) return; /* Trim array length to genio size. */ max_items = ktr_geniosize / struct_size; if (num_items > max_items) { if (max_items == 0) num_items = 1; else num_items = max_items; } datalen = num_items * struct_size; if (data == NULL) datalen = 0; namelen = strlen(name) + 1; buflen = namelen + datalen; buf = malloc(buflen, M_KTRACE, M_WAITOK); strcpy(buf, name); if (seg == UIO_SYSSPACE) bcopy(data, buf + namelen, datalen); else { if (copyin(data, buf + namelen, datalen) != 0) { free(buf, M_KTRACE); return; } } if ((req = ktr_getrequest(KTR_STRUCT_ARRAY)) == NULL) { free(buf, M_KTRACE); return; } ksa = &req->ktr_data.ktr_struct_array; ksa->struct_size = struct_size; req->ktr_buffer = buf; req->ktr_header.ktr_len = buflen; ktr_submitrequest(curthread, req); } void ktrcapfail(enum ktr_cap_fail_type type, const cap_rights_t *needed, const cap_rights_t *held) { struct thread *td = curthread; struct ktr_request *req; struct ktr_cap_fail *kcf; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_CAPFAIL); if (req == NULL) return; kcf = &req->ktr_data.ktr_cap_fail; kcf->cap_type = type; if (needed != NULL) kcf->cap_needed = *needed; else cap_rights_init(&kcf->cap_needed); if (held != NULL) kcf->cap_held = *held; else cap_rights_init(&kcf->cap_held); ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrfault(vm_offset_t vaddr, int type) { struct thread *td = curthread; struct ktr_request *req; struct ktr_fault *kf; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_FAULT); if (req == NULL) return; kf = &req->ktr_data.ktr_fault; kf->vaddr = vaddr; kf->type = type; ktr_enqueuerequest(td, req); ktrace_exit(td); } void ktrfaultend(int result) { struct thread *td = curthread; struct ktr_request *req; struct ktr_faultend *kf; if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) return; req = ktr_getrequest(KTR_FAULTEND); if (req == NULL) return; kf = &req->ktr_data.ktr_faultend; kf->result = result; ktr_enqueuerequest(td, req); ktrace_exit(td); } #endif /* KTRACE */ /* Interface and common routines */ #ifndef _SYS_SYSPROTO_H_ struct ktrace_args { char *fname; int ops; int facs; int pid; }; #endif /* ARGSUSED */ int sys_ktrace(struct thread *td, struct ktrace_args *uap) { #ifdef KTRACE struct vnode *vp = NULL; struct proc *p; struct pgrp *pg; int facs = uap->facs & ~KTRFAC_ROOT; int ops = KTROP(uap->ops); int descend = uap->ops & KTRFLAG_DESCEND; int ret = 0; int flags, error = 0; struct nameidata nd; struct ktr_io_params *kiop, *old_kiop; /* * Need something to (un)trace. */ if (ops != KTROP_CLEARFILE && facs == 0) return (EINVAL); kiop = NULL; if (ops != KTROP_CLEAR) { /* * an operation which requires a file argument. */ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname); flags = FREAD | FWRITE | O_NOFOLLOW; error = vn_open(&nd, &flags, 0, NULL); if (error) return (error); NDFREE_PNBUF(&nd); vp = nd.ni_vp; VOP_UNLOCK(vp); if (vp->v_type != VREG) { (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); return (EACCES); } kiop = ktr_io_params_alloc(td, vp); } /* * Clear all uses of the tracefile. */ ktrace_enter(td); if (ops == KTROP_CLEARFILE) { restart: sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { old_kiop = NULL; PROC_LOCK(p); if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp == vp) { if (ktrcanset(td, p)) { mtx_lock(&ktrace_mtx); old_kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); } else error = EPERM; } PROC_UNLOCK(p); if (old_kiop != NULL) { sx_sunlock(&allproc_lock); ktr_io_params_free(old_kiop); goto restart; } } sx_sunlock(&allproc_lock); goto done; } /* * do it */ sx_slock(&proctree_lock); if (uap->pid < 0) { /* * by process group */ pg = pgfind(-uap->pid); if (pg == NULL) { sx_sunlock(&proctree_lock); error = ESRCH; goto done; } /* * ktrops() may call vrele(). Lock pg_members * by the proctree_lock rather than pg_mtx. */ PGRP_UNLOCK(pg); if (LIST_EMPTY(&pg->pg_members)) { sx_sunlock(&proctree_lock); error = ESRCH; goto done; } LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (descend) ret |= ktrsetchildren(td, p, ops, facs, kiop); else ret |= ktrops(td, p, ops, facs, kiop); } } else { /* * by pid */ p = pfind(uap->pid); if (p == NULL) { error = ESRCH; sx_sunlock(&proctree_lock); goto done; } if (descend) ret |= ktrsetchildren(td, p, ops, facs, kiop); else ret |= ktrops(td, p, ops, facs, kiop); } sx_sunlock(&proctree_lock); if (!ret) error = EPERM; done: if (kiop != NULL) { mtx_lock(&ktrace_mtx); kiop = ktr_io_params_rele(kiop); mtx_unlock(&ktrace_mtx); ktr_io_params_free(kiop); } ktrace_exit(td); return (error); #else /* !KTRACE */ return (ENOSYS); #endif /* KTRACE */ } /* ARGSUSED */ int sys_utrace(struct thread *td, struct utrace_args *uap) { #ifdef KTRACE struct ktr_request *req; void *cp; int error; if (!KTRPOINT(td, KTR_USER)) return (0); if (uap->len > KTR_USER_MAXLEN) return (EINVAL); cp = malloc(uap->len, M_KTRACE, M_WAITOK); error = copyin(uap->addr, cp, uap->len); if (error) { free(cp, M_KTRACE); return (error); } req = ktr_getrequest(KTR_USER); if (req == NULL) { free(cp, M_KTRACE); return (ENOMEM); } req->ktr_buffer = cp; req->ktr_header.ktr_len = uap->len; ktr_submitrequest(td, req); return (0); #else /* !KTRACE */ return (ENOSYS); #endif /* KTRACE */ } #ifdef KTRACE static int ktrops(struct thread *td, struct proc *p, int ops, int facs, struct ktr_io_params *new_kiop) { struct ktr_io_params *old_kiop; PROC_LOCK_ASSERT(p, MA_OWNED); if (!ktrcanset(td, p)) { PROC_UNLOCK(p); return (0); } if ((ops == KTROP_SET && p->p_state == PRS_NEW) || p_cansee(td, p) != 0) { /* * Disallow setting trace points if the process is being born. * This avoids races with trace point inheritance in * ktrprocfork(). */ PROC_UNLOCK(p); return (0); } if ((p->p_flag & P_WEXIT) != 0) { /* * There's nothing to do if the process is exiting, but avoid * signaling an error. */ PROC_UNLOCK(p); return (1); } old_kiop = NULL; mtx_lock(&ktrace_mtx); if (ops == KTROP_SET) { if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp != new_kiop->vp) { /* if trace file already in use, relinquish below */ old_kiop = ktr_io_params_rele(p->p_ktrioparms); p->p_ktrioparms = NULL; } if (p->p_ktrioparms == NULL) { p->p_ktrioparms = new_kiop; ktr_io_params_ref(new_kiop); } p->p_traceflag |= facs; if (priv_check(td, PRIV_KTRACE) == 0) p->p_traceflag |= KTRFAC_ROOT; } else { /* KTROP_CLEAR */ if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) /* no more tracing */ old_kiop = ktr_freeproc(p); } mtx_unlock(&ktrace_mtx); if ((p->p_traceflag & KTRFAC_MASK) != 0) ktrprocctor_entered(td, p); PROC_UNLOCK(p); ktr_io_params_free(old_kiop); return (1); } static int ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, struct ktr_io_params *new_kiop) { struct proc *p; int ret = 0; p = top; PROC_LOCK_ASSERT(p, MA_OWNED); sx_assert(&proctree_lock, SX_LOCKED); for (;;) { ret |= ktrops(td, p, ops, facs, new_kiop); /* * If this process has children, descend to them next, * otherwise do any siblings, and if done with this level, * follow back up the tree (but not past top). */ if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) return (ret); if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } PROC_LOCK(p); } /*NOTREACHED*/ } static void ktr_writerequest(struct thread *td, struct ktr_request *req) { struct ktr_io_params *kiop, *kiop1; struct ktr_header *kth; struct vnode *vp; struct proc *p; struct ucred *cred; struct uio auio; struct iovec aiov[3]; struct mount *mp; off_t lim; int datalen, buflen; int error; p = td->td_proc; /* * We reference the kiop for use in I/O in case ktrace is * disabled on the process as we write out the request. */ mtx_lock(&ktrace_mtx); kiop = p->p_ktrioparms; /* * If kiop is NULL, it has been cleared out from under this * request, so just drop it. */ if (kiop == NULL) { mtx_unlock(&ktrace_mtx); return; } ktr_io_params_ref(kiop); vp = kiop->vp; cred = kiop->cr; lim = kiop->lim; KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); mtx_unlock(&ktrace_mtx); kth = &req->ktr_header; KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < nitems(data_lengths), ("data_lengths array overflow")); datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; buflen = kth->ktr_len; auio.uio_iov = &aiov[0]; auio.uio_offset = 0; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; aiov[0].iov_base = (caddr_t)kth; aiov[0].iov_len = sizeof(struct ktr_header); auio.uio_resid = sizeof(struct ktr_header); auio.uio_iovcnt = 1; auio.uio_td = td; if (datalen != 0) { aiov[1].iov_base = (caddr_t)&req->ktr_data; aiov[1].iov_len = datalen; auio.uio_resid += datalen; auio.uio_iovcnt++; kth->ktr_len += datalen; } if (buflen != 0) { KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; aiov[auio.uio_iovcnt].iov_len = buflen; auio.uio_resid += buflen; auio.uio_iovcnt++; } vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); td->td_ktr_io_lim = lim; #ifdef MAC error = mac_vnode_check_write(cred, NOCRED, vp); if (error == 0) #endif error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); VOP_UNLOCK(vp); vn_finished_write(mp); if (error == 0) { mtx_lock(&ktrace_mtx); kiop = ktr_io_params_rele(kiop); mtx_unlock(&ktrace_mtx); ktr_io_params_free(kiop); return; } /* * If error encountered, give up tracing on this vnode on this * process. Other processes might still be suitable for * writes to this vnode. */ log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped for pid %d\n", error, p->p_pid); kiop1 = NULL; PROC_LOCK(p); mtx_lock(&ktrace_mtx); if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp == vp) kiop1 = ktr_freeproc(p); kiop = ktr_io_params_rele(kiop); mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); ktr_io_params_free(kiop1); ktr_io_params_free(kiop); } /* * Return true if caller has permission to set the ktracing state * of target. Essentially, the target can't possess any * more permissions than the caller. KTRFAC_ROOT signifies that * root previously set the tracing status on the target process, and * so, only root may further change it. */ static int ktrcanset(struct thread *td, struct proc *targetp) { PROC_LOCK_ASSERT(targetp, MA_OWNED); if (targetp->p_traceflag & KTRFAC_ROOT && priv_check(td, PRIV_KTRACE)) return (0); if (p_candebug(td, targetp) != 0) return (0); return (1); } #endif /* KTRACE */ diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 965e22a38598..ce8529bee256 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -1,1616 +1,1616 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1994, Sean Eric Fagan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #endif /* Assert it's safe to unlock a process, e.g. to allocate working memory */ #define PROC_ASSERT_TRACEREQ(p) MPASS(((p)->p_flag2 & P2_PTRACEREQ) != 0) /* * Functions implemented using PROC_ACTION(): * * proc_read_regs(proc, regs) * Get the current user-visible register set from the process * and copy it into the regs structure (). * The process is stopped at the time read_regs is called. * * proc_write_regs(proc, regs) * Update the current register set from the passed in regs * structure. Take care to avoid clobbering special CPU * registers or privileged bits in the PSL. * Depending on the architecture this may have fix-up work to do, * especially if the IAR or PCW are modified. * The process is stopped at the time write_regs is called. * * proc_read_fpregs, proc_write_fpregs * deal with the floating point register set, otherwise as above. * * proc_read_dbregs, proc_write_dbregs * deal with the processor debug register set, otherwise as above. * * proc_sstep(proc) * Arrange for the process to trap after executing a single instruction. */ #define PROC_ACTION(action) do { \ int error; \ \ PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); \ if ((td->td_proc->p_flag & P_INMEM) == 0) \ error = EIO; \ else \ error = (action); \ return (error); \ } while (0) int proc_read_regs(struct thread *td, struct reg *regs) { PROC_ACTION(fill_regs(td, regs)); } int proc_write_regs(struct thread *td, struct reg *regs) { PROC_ACTION(set_regs(td, regs)); } int proc_read_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(fill_dbregs(td, dbregs)); } int proc_write_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(set_dbregs(td, dbregs)); } /* * Ptrace doesn't support fpregs at all, and there are no security holes * or translations for fpregs, so we can just copy them. */ int proc_read_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(fill_fpregs(td, fpregs)); } int proc_write_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(set_fpregs(td, fpregs)); } static struct regset * proc_find_regset(struct thread *td, int note) { struct regset **regsetp, **regset_end, *regset; struct sysentvec *sv; sv = td->td_proc->p_sysent; regsetp = sv->sv_regset_begin; if (regsetp == NULL) return (NULL); regset_end = sv->sv_regset_end; MPASS(regset_end != NULL); for (; regsetp < regset_end; regsetp++) { regset = *regsetp; if (regset->note != note) continue; return (regset); } return (NULL); } static int proc_read_regset(struct thread *td, int note, struct iovec *iov) { struct regset *regset; struct proc *p; void *buf; size_t size; int error; regset = proc_find_regset(td, note); if (regset == NULL) return (EINVAL); if (iov->iov_base == NULL) { iov->iov_len = regset->size; if (iov->iov_len == 0) return (EINVAL); return (0); } /* The length is wrong, return an error */ if (iov->iov_len != regset->size) return (EINVAL); if (regset->get == NULL) return (EINVAL); error = 0; size = regset->size; p = td->td_proc; /* Drop the proc lock while allocating the temp buffer */ PROC_ASSERT_TRACEREQ(p); PROC_UNLOCK(p); buf = malloc(size, M_TEMP, M_WAITOK); PROC_LOCK(p); if (!regset->get(regset, td, buf, &size)) { error = EINVAL; } else { KASSERT(size == regset->size, ("%s: Getter function changed the size", __func__)); iov->iov_len = size; PROC_UNLOCK(p); error = copyout(buf, iov->iov_base, size); PROC_LOCK(p); } free(buf, M_TEMP); return (error); } static int proc_write_regset(struct thread *td, int note, struct iovec *iov) { struct regset *regset; struct proc *p; void *buf; size_t size; int error; regset = proc_find_regset(td, note); if (regset == NULL) return (EINVAL); /* The length is wrong, return an error */ if (iov->iov_len != regset->size) return (EINVAL); if (regset->set == NULL) return (EINVAL); size = regset->size; p = td->td_proc; /* Drop the proc lock while allocating the temp buffer */ PROC_ASSERT_TRACEREQ(p); PROC_UNLOCK(p); buf = malloc(size, M_TEMP, M_WAITOK); error = copyin(iov->iov_base, buf, size); PROC_LOCK(p); if (error == 0) { if (!regset->set(regset, td, buf, size)) { error = EINVAL; } } free(buf, M_TEMP); return (error); } #ifdef COMPAT_FREEBSD32 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */ int proc_read_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(fill_regs32(td, regs32)); } int proc_write_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(set_regs32(td, regs32)); } int proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(fill_dbregs32(td, dbregs32)); } int proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(set_dbregs32(td, dbregs32)); } int proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(fill_fpregs32(td, fpregs32)); } int proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(set_fpregs32(td, fpregs32)); } #endif int proc_sstep(struct thread *td) { PROC_ACTION(ptrace_single_step(td)); } int proc_rwmem(struct proc *p, struct uio *uio) { vm_map_t map; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; int error, fault_flags, page_offset, writing; /* * Make sure that the process' vmspace remains live. */ if (p != curproc) PROC_ASSERT_HELD(p); PROC_LOCK_ASSERT(p, MA_NOTOWNED); /* * The map we want... */ map = &p->p_vmspace->vm_map; /* * If we are writing, then we request vm_fault() to create a private * copy of each page. Since these copies will not be writeable by the * process, we must explicity request that they be dirtied. */ writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; /* * Only map in one page at a time. We don't have to, but it * makes things easier. This way is trivial - right? */ do { vm_offset_t uva; u_int len; vm_page_t m; uva = (vm_offset_t)uio->uio_offset; /* * Get the page number of this segment. */ pageno = trunc_page(uva); page_offset = uva - pageno; /* * How many bytes to copy */ len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* * Fault and hold the page on behalf of the process. */ error = vm_fault(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; else error = EFAULT; break; } /* * Now do the i/o move. */ error = uiomove_fromphys(&m, page_offset, len, uio); /* Make the I-cache coherent for breakpoints. */ if (writing && error == 0) { vm_map_lock_read(map); if (vm_map_check_protection(map, pageno, pageno + PAGE_SIZE, VM_PROT_EXECUTE)) vm_sync_icache(map, uva, len); vm_map_unlock_read(map); } /* * Release the page. */ vm_page_unwire(m, PQ_ACTIVE); } while (error == 0 && uio->uio_resid > 0); return (error); } static ssize_t proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf, size_t len, enum uio_rw rw) { struct iovec iov; struct uio uio; ssize_t slen; MPASS(len < SSIZE_MAX); slen = (ssize_t)len; iov.iov_base = (caddr_t)buf; iov.iov_len = len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = va; uio.uio_resid = slen; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = rw; uio.uio_td = td; proc_rwmem(p, &uio); if (uio.uio_resid == slen) return (-1); return (slen - uio.uio_resid); } ssize_t proc_readmem(struct thread *td, struct proc *p, vm_offset_t va, void *buf, size_t len) { return (proc_iop(td, p, va, buf, len, UIO_READ)); } ssize_t proc_writemem(struct thread *td, struct proc *p, vm_offset_t va, void *buf, size_t len) { return (proc_iop(td, p, va, buf, len, UIO_WRITE)); } static int ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve) { struct vattr vattr; vm_map_t map; vm_map_entry_t entry; vm_object_t obj, tobj, lobj; struct vmspace *vm; struct vnode *vp; char *freepath, *fullpath; u_int pathlen; int error, index; error = 0; obj = NULL; vm = vmspace_acquire_ref(p); map = &vm->vm_map; vm_map_lock_read(map); do { KASSERT((map->header.eflags & MAP_ENTRY_IS_SUB_MAP) == 0, ("Submap in map header")); index = 0; VM_MAP_ENTRY_FOREACH(entry, map) { if (index >= pve->pve_entry && (entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) break; index++; } if (index < pve->pve_entry) { error = EINVAL; break; } if (entry == &map->header) { error = ENOENT; break; } /* We got an entry. */ pve->pve_entry = index + 1; pve->pve_timestamp = map->timestamp; pve->pve_start = entry->start; pve->pve_end = entry->end - 1; pve->pve_offset = entry->offset; pve->pve_prot = entry->protection; /* Backing object's path needed? */ if (pve->pve_pathlen == 0) break; pathlen = pve->pve_pathlen; pve->pve_pathlen = 0; obj = entry->object.vm_object; if (obj != NULL) VM_OBJECT_RLOCK(obj); } while (0); vm_map_unlock_read(map); pve->pve_fsid = VNOVAL; pve->pve_fileid = VNOVAL; if (error == 0 && obj != NULL) { lobj = obj; for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; pve->pve_offset += tobj->backing_object_offset; } vp = vm_object_vnode(lobj); if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { freepath = NULL; fullpath = NULL; vn_fullpath(vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) { pve->pve_fileid = vattr.va_fileid; pve->pve_fsid = vattr.va_fsid; } vput(vp); if (fullpath != NULL) { pve->pve_pathlen = strlen(fullpath) + 1; if (pve->pve_pathlen <= pathlen) { error = copyout(fullpath, pve->pve_path, pve->pve_pathlen); } else error = ENAMETOOLONG; } if (freepath != NULL) free(freepath, M_TEMP); } } vmspace_free(vm); if (error == 0) CTR3(KTR_PTRACE, "PT_VM_ENTRY: pid %d, entry %d, start %p", p->p_pid, pve->pve_entry, pve->pve_start); return (error); } /* * Process debugging system call. */ #ifndef _SYS_SYSPROTO_H_ struct ptrace_args { int req; pid_t pid; caddr_t addr; int data; }; #endif int sys_ptrace(struct thread *td, struct ptrace_args *uap) { /* * XXX this obfuscation is to reduce stack usage, but the register * structs may be too large to put on the stack anyway. */ union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct ptrace_coredump pc; struct dbreg dbreg; struct fpreg fpreg; struct reg reg; struct iovec vec; - char args[sizeof(td->td_sa.args)]; + syscallarg_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret psr; int ptevents; } r; void *addr; int error; if (!allow_ptrace) return (ENOSYS); error = 0; AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; switch (uap->req) { case PT_GET_EVENT_MASK: case PT_LWPINFO: case PT_GET_SC_ARGS: case PT_GET_SC_RET: break; case PT_GETREGS: bzero(&r.reg, sizeof(r.reg)); break; case PT_GETFPREGS: bzero(&r.fpreg, sizeof(r.fpreg)); break; case PT_GETDBREGS: bzero(&r.dbreg, sizeof(r.dbreg)); break; case PT_GETREGSET: case PT_SETREGSET: error = copyin(uap->addr, &r.vec, sizeof(r.vec)); break; case PT_SETREGS: error = copyin(uap->addr, &r.reg, sizeof(r.reg)); break; case PT_SETFPREGS: error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg)); break; case PT_SETDBREGS: error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg)); break; case PT_SET_EVENT_MASK: if (uap->data != sizeof(r.ptevents)) error = EINVAL; else error = copyin(uap->addr, &r.ptevents, uap->data); break; case PT_IO: error = copyin(uap->addr, &r.piod, sizeof(r.piod)); break; case PT_VM_ENTRY: error = copyin(uap->addr, &r.pve, sizeof(r.pve)); break; case PT_COREDUMP: if (uap->data != sizeof(r.pc)) error = EINVAL; else error = copyin(uap->addr, &r.pc, uap->data); break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: error = copyout(&r.pve, uap->addr, sizeof(r.pve)); break; case PT_IO: error = copyout(&r.piod, uap->addr, sizeof(r.piod)); break; case PT_GETREGS: error = copyout(&r.reg, uap->addr, sizeof(r.reg)); break; case PT_GETFPREGS: error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg)); break; case PT_GETDBREGS: error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg)); break; case PT_GETREGSET: error = copyout(&r.vec, uap->addr, sizeof(r.vec)); break; case PT_GET_EVENT_MASK: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.ptevents, uap->addr, uap->data); break; case PT_LWPINFO: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.pl, uap->addr, uap->data); break; case PT_GET_SC_ARGS: error = copyout(r.args, uap->addr, MIN(uap->data, sizeof(r.args))); break; case PT_GET_SC_RET: error = copyout(&r.psr, uap->addr, MIN(uap->data, sizeof(r.psr))); break; } return (error); } #ifdef COMPAT_FREEBSD32 /* * PROC_READ(regs, td2, addr); * becomes either: * proc_read_regs(td2, addr); * or * proc_read_regs32(td2, addr); * .. except this is done at runtime. There is an additional * complication in that PROC_WRITE disallows 32 bit consumers * from writing to 64 bit address space targets. */ #define PROC_READ(w, t, a) wrap32 ? \ proc_read_ ## w ## 32(t, a) : \ proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) wrap32 ? \ (safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \ proc_write_ ## w (t, a) #else #define PROC_READ(w, t, a) proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) proc_write_ ## w (t, a) #endif void proc_set_traced(struct proc *p, bool stop) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); p->p_flag |= P_TRACED; if (stop) p->p_flag2 |= P2_PTRACE_FSTP; p->p_ptevents = PTRACE_DEFAULT; } void ptrace_unsuspend(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK(p); p->p_flag &= ~(P_STOPPED_TRACE | P_STOPPED_SIG | P_WAITED); thread_unsuspend(p); PROC_SUNLOCK(p); itimer_proc_continue(p); kqtimer_proc_continue(p); } static int proc_can_ptrace(struct thread *td, struct proc *p) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag & P_WEXIT) != 0) return (ESRCH); if ((error = p_cansee(td, p)) != 0) return (error); if ((error = p_candebug(td, p)) != 0) return (error); /* not being traced... */ if ((p->p_flag & P_TRACED) == 0) return (EPERM); /* not being traced by YOU */ if (p->p_pptr != td->td_proc) return (EBUSY); /* not currently stopped */ if ((p->p_flag & P_STOPPED_TRACE) == 0 || p->p_suspcount != p->p_numthreads || (p->p_flag & P_WAITED) == 0) return (EBUSY); return (0); } static struct thread * ptrace_sel_coredump_thread(struct proc *p) { struct thread *td2; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS((p->p_flag & P_STOPPED_TRACE) != 0); FOREACH_THREAD_IN_PROC(p, td2) { if ((td2->td_dbgflags & TDB_SSWITCH) != 0) return (td2); } return (NULL); } int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) { struct iovec iov; struct uio uio; struct proc *curp, *p, *pp; struct thread *td2 = NULL, *td3; struct ptrace_io_desc *piod = NULL; struct ptrace_lwpinfo *pl; struct ptrace_sc_ret *psr; struct file *fp; struct ptrace_coredump *pc; struct thr_coredump_req *tcq; int error, num, tmp; lwpid_t tid = 0, *buf; #ifdef COMPAT_FREEBSD32 int wrap32 = 0, safe = 0; #endif bool proctree_locked, p2_req_set; curp = td->td_proc; proctree_locked = false; p2_req_set = false; /* Lock proctree before locking the process. */ switch (req) { case PT_TRACE_ME: case PT_ATTACH: case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_FOLLOW_FORK: case PT_LWP_EVENTS: case PT_GET_EVENT_MASK: case PT_SET_EVENT_MASK: case PT_DETACH: case PT_GET_SC_ARGS: sx_xlock(&proctree_lock); proctree_locked = true; break; default: break; } if (req == PT_TRACE_ME) { p = td->td_proc; PROC_LOCK(p); } else { if (pid <= PID_MAX) { if ((p = pfind(pid)) == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } } else { td2 = tdfind(pid, -1); if (td2 == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } p = td2->td_proc; tid = pid; pid = p->p_pid; } } AUDIT_ARG_PROCESS(p); if ((p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto fail; } if ((error = p_cansee(td, p)) != 0) goto fail; if ((error = p_candebug(td, p)) != 0) goto fail; /* * System processes can't be debugged. */ if ((p->p_flag & P_SYSTEM) != 0) { error = EINVAL; goto fail; } if (tid == 0) { if ((p->p_flag & P_STOPPED_TRACE) != 0) { KASSERT(p->p_xthread != NULL, ("NULL p_xthread")); td2 = p->p_xthread; } else { td2 = FIRST_THREAD_IN_PROC(p); } tid = td2->td_tid; } #ifdef COMPAT_FREEBSD32 /* * Test if we're a 32 bit client and what the target is. * Set the wrap controls accordingly. */ if (SV_CURPROC_FLAG(SV_ILP32)) { if (SV_PROC_FLAG(td2->td_proc, SV_ILP32)) safe = 1; wrap32 = 1; } #endif /* * Permissions check */ switch (req) { case PT_TRACE_ME: /* * Always legal, when there is a parent process which * could trace us. Otherwise, reject. */ if ((p->p_flag & P_TRACED) != 0) { error = EBUSY; goto fail; } if (p->p_pptr == initproc) { error = EPERM; goto fail; } break; case PT_ATTACH: /* Self */ if (p == td->td_proc) { error = EINVAL; goto fail; } /* Already traced */ if (p->p_flag & P_TRACED) { error = EBUSY; goto fail; } /* Can't trace an ancestor if you're being traced. */ if (curp->p_flag & P_TRACED) { for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) { if (pp == p) { error = EINVAL; goto fail; } } } /* OK */ break; case PT_CLEARSTEP: /* Allow thread to clear single step for itself */ if (td->td_tid == tid) break; /* FALLTHROUGH */ default: /* * Check for ptrace eligibility before waiting for * holds to drain. */ error = proc_can_ptrace(td, p); if (error != 0) goto fail; /* * Block parallel ptrace requests. Most important, do * not allow other thread in debugger to continue the * debuggee until coredump finished. */ while ((p->p_flag2 & P2_PTRACEREQ) != 0) { if (proctree_locked) sx_xunlock(&proctree_lock); error = msleep(&p->p_flag2, &p->p_mtx, PPAUSE | PCATCH | (proctree_locked ? PDROP : 0), "pptrace", 0); if (proctree_locked) { sx_xlock(&proctree_lock); PROC_LOCK(p); } if (error == 0 && td2->td_proc != p) error = ESRCH; if (error == 0) error = proc_can_ptrace(td, p); if (error != 0) goto fail; } /* Ok */ break; } /* * Keep this process around and request parallel ptrace() * request to wait until we finish this request. */ MPASS((p->p_flag2 & P2_PTRACEREQ) == 0); p->p_flag2 |= P2_PTRACEREQ; p2_req_set = true; _PHOLD(p); /* * Actually do the requests */ td->td_retval[0] = 0; switch (req) { case PT_TRACE_ME: /* set my trace flag and "owner" so it can read/write me */ proc_set_traced(p, false); if (p->p_flag & P_PPWAIT) p->p_flag |= P_PPTRACE; CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid); break; case PT_ATTACH: /* security check done above */ /* * It would be nice if the tracing relationship was separate * from the parent relationship but that would require * another set of links in the proc struct or for "wait" * to scan the entire proc table. To make life easier, * we just re-parent the process we're trying to trace. * The old parent is remembered so we can put things back * on a "detach". */ proc_set_traced(p, true); proc_reparent(p, td->td_proc, false); CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid, p->p_oppid); sx_xunlock(&proctree_lock); proctree_locked = false; MPASS(p->p_xthread == NULL); MPASS((p->p_flag & P_STOPPED_TRACE) == 0); /* * If already stopped due to a stop signal, clear the * existing stop before triggering a traced SIGSTOP. */ if ((p->p_flag & P_STOPPED_SIG) != 0) { PROC_SLOCK(p); p->p_flag &= ~(P_STOPPED_SIG | P_WAITED); thread_unsuspend(p); PROC_SUNLOCK(p); } kern_psignal(p, SIGSTOP); break; case PT_CLEARSTEP: CTR2(KTR_PTRACE, "PT_CLEARSTEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_clear_single_step(td2); break; case PT_SETSTEP: CTR2(KTR_PTRACE, "PT_SETSTEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_single_step(td2); break; case PT_SUSPEND: CTR2(KTR_PTRACE, "PT_SUSPEND: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_SUSPEND; thread_lock(td2); td2->td_flags |= TDF_NEEDSUSPCHK; thread_unlock(td2); break; case PT_RESUME: CTR2(KTR_PTRACE, "PT_RESUME: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags &= ~TDB_SUSPEND; break; case PT_FOLLOW_FORK: CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid, p->p_ptevents & PTRACE_FORK ? "enabled" : "disabled", data ? "enabled" : "disabled"); if (data) p->p_ptevents |= PTRACE_FORK; else p->p_ptevents &= ~PTRACE_FORK; break; case PT_LWP_EVENTS: CTR3(KTR_PTRACE, "PT_LWP_EVENTS: pid %d %s -> %s", p->p_pid, p->p_ptevents & PTRACE_LWP ? "enabled" : "disabled", data ? "enabled" : "disabled"); if (data) p->p_ptevents |= PTRACE_LWP; else p->p_ptevents &= ~PTRACE_LWP; break; case PT_GET_EVENT_MASK: if (data != sizeof(p->p_ptevents)) { error = EINVAL; break; } CTR2(KTR_PTRACE, "PT_GET_EVENT_MASK: pid %d mask %#x", p->p_pid, p->p_ptevents); *(int *)addr = p->p_ptevents; break; case PT_SET_EVENT_MASK: if (data != sizeof(p->p_ptevents)) { error = EINVAL; break; } tmp = *(int *)addr; if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX | PTRACE_FORK | PTRACE_LWP | PTRACE_VFORK)) != 0) { error = EINVAL; break; } CTR3(KTR_PTRACE, "PT_SET_EVENT_MASK: pid %d mask %#x -> %#x", p->p_pid, p->p_ptevents, tmp); p->p_ptevents = tmp; break; case PT_GET_SC_ARGS: CTR1(KTR_PTRACE, "PT_GET_SC_ARGS: pid %d", p->p_pid); if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) == 0 #ifdef COMPAT_FREEBSD32 || (wrap32 && !safe) #endif ) { error = EINVAL; break; } bzero(addr, sizeof(td2->td_sa.args)); /* See the explanation in linux_ptrace_get_syscall_info(). */ bcopy(td2->td_sa.args, addr, SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX ? sizeof(td2->td_sa.args) : - td2->td_sa.callp->sy_narg * sizeof(register_t)); + td2->td_sa.callp->sy_narg * sizeof(syscallarg_t)); break; case PT_GET_SC_RET: if ((td2->td_dbgflags & (TDB_SCX)) == 0 #ifdef COMPAT_FREEBSD32 || (wrap32 && !safe) #endif ) { error = EINVAL; break; } psr = addr; bzero(psr, sizeof(*psr)); psr->sr_error = td2->td_errno; if (psr->sr_error == 0) { psr->sr_retval[0] = td2->td_retval[0]; psr->sr_retval[1] = td2->td_retval[1]; } CTR4(KTR_PTRACE, "PT_GET_SC_RET: pid %d error %d retval %#lx,%#lx", p->p_pid, psr->sr_error, psr->sr_retval[0], psr->sr_retval[1]); break; case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_DETACH: /* Zero means do not send any signal */ if (data < 0 || data > _SIG_MAXSIG) { error = EINVAL; break; } switch (req) { case PT_STEP: CTR3(KTR_PTRACE, "PT_STEP: tid %d (pid %d), sig = %d", td2->td_tid, p->p_pid, data); error = ptrace_single_step(td2); if (error) goto out; break; case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: if (addr != (void *)1) { error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr); if (error) goto out; } switch (req) { case PT_TO_SCE: p->p_ptevents |= PTRACE_SCE; CTR4(KTR_PTRACE, "PT_TO_SCE: pid %d, events = %#x, PC = %#lx, sig = %d", p->p_pid, p->p_ptevents, (u_long)(uintfptr_t)addr, data); break; case PT_TO_SCX: p->p_ptevents |= PTRACE_SCX; CTR4(KTR_PTRACE, "PT_TO_SCX: pid %d, events = %#x, PC = %#lx, sig = %d", p->p_pid, p->p_ptevents, (u_long)(uintfptr_t)addr, data); break; case PT_SYSCALL: p->p_ptevents |= PTRACE_SYSCALL; CTR4(KTR_PTRACE, "PT_SYSCALL: pid %d, events = %#x, PC = %#lx, sig = %d", p->p_pid, p->p_ptevents, (u_long)(uintfptr_t)addr, data); break; case PT_CONTINUE: CTR3(KTR_PTRACE, "PT_CONTINUE: pid %d, PC = %#lx, sig = %d", p->p_pid, (u_long)(uintfptr_t)addr, data); break; } break; case PT_DETACH: /* * Clear P_TRACED before reparenting * a detached process back to its original * parent. Otherwise the debugee will be set * as an orphan of the debugger. */ p->p_flag &= ~(P_TRACED | P_WAITED); /* * Reset the process parent. */ if (p->p_oppid != p->p_pptr->p_pid) { PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); pp = proc_realparent(p); proc_reparent(p, pp, false); if (pp == initproc) p->p_sigparent = SIGCHLD; CTR3(KTR_PTRACE, "PT_DETACH: pid %d reparented to pid %d, sig %d", p->p_pid, pp->p_pid, data); } else { CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d", p->p_pid, data); } p->p_ptevents = 0; FOREACH_THREAD_IN_PROC(p, td3) { if ((td3->td_dbgflags & TDB_FSTP) != 0) { sigqueue_delete(&td3->td_sigqueue, SIGSTOP); } td3->td_dbgflags &= ~(TDB_XSIG | TDB_FSTP | TDB_SUSPEND); } if ((p->p_flag2 & P2_PTRACE_FSTP) != 0) { sigqueue_delete(&p->p_sigqueue, SIGSTOP); p->p_flag2 &= ~P2_PTRACE_FSTP; } /* should we send SIGCHLD? */ /* childproc_continued(p); */ break; } sx_xunlock(&proctree_lock); proctree_locked = false; sendsig: MPASS(!proctree_locked); /* * Clear the pending event for the thread that just * reported its event (p_xthread). This may not be * the thread passed to PT_CONTINUE, PT_STEP, etc. if * the debugger is resuming a different thread. * * Deliver any pending signal via the reporting thread. */ MPASS(p->p_xthread != NULL); p->p_xthread->td_dbgflags &= ~TDB_XSIG; p->p_xthread->td_xsig = data; p->p_xthread = NULL; p->p_xsig = data; /* * P_WKILLED is insurance that a PT_KILL/SIGKILL * always works immediately, even if another thread is * unsuspended first and attempts to handle a * different signal or if the POSIX.1b style signal * queue cannot accommodate any new signals. */ if (data == SIGKILL) proc_wkilled(p); /* * Unsuspend all threads. To leave a thread * suspended, use PT_SUSPEND to suspend it before * continuing the process. */ ptrace_unsuspend(p); break; case PT_WRITE_I: case PT_WRITE_D: td2->td_dbgflags |= TDB_USERWR; PROC_UNLOCK(p); error = 0; if (proc_writemem(td, p, (off_t)(uintptr_t)addr, &data, sizeof(int)) != sizeof(int)) error = ENOMEM; else CTR3(KTR_PTRACE, "PT_WRITE: pid %d: %p <= %#x", p->p_pid, addr, data); PROC_LOCK(p); break; case PT_READ_I: case PT_READ_D: PROC_UNLOCK(p); error = tmp = 0; if (proc_readmem(td, p, (off_t)(uintptr_t)addr, &tmp, sizeof(int)) != sizeof(int)) error = ENOMEM; else CTR3(KTR_PTRACE, "PT_READ: pid %d: %p >= %#x", p->p_pid, addr, tmp); td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_IO: piod = addr; iov.iov_base = piod->piod_addr; iov.iov_len = piod->piod_len; uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs; uio.uio_resid = piod->piod_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_USERSPACE; uio.uio_td = td; switch (piod->piod_op) { case PIOD_READ_D: case PIOD_READ_I: CTR3(KTR_PTRACE, "PT_IO: pid %d: READ (%p, %#x)", p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid); uio.uio_rw = UIO_READ; break; case PIOD_WRITE_D: case PIOD_WRITE_I: CTR3(KTR_PTRACE, "PT_IO: pid %d: WRITE (%p, %#x)", p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid); td2->td_dbgflags |= TDB_USERWR; uio.uio_rw = UIO_WRITE; break; default: error = EINVAL; goto out; } PROC_UNLOCK(p); error = proc_rwmem(p, &uio); piod->piod_len -= uio.uio_resid; PROC_LOCK(p); break; case PT_KILL: CTR1(KTR_PTRACE, "PT_KILL: pid %d", p->p_pid); data = SIGKILL; goto sendsig; /* in PT_CONTINUE above */ case PT_SETREGS: CTR2(KTR_PTRACE, "PT_SETREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(regs, td2, addr); break; case PT_GETREGS: CTR2(KTR_PTRACE, "PT_GETREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(regs, td2, addr); break; case PT_SETFPREGS: CTR2(KTR_PTRACE, "PT_SETFPREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(fpregs, td2, addr); break; case PT_GETFPREGS: CTR2(KTR_PTRACE, "PT_GETFPREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(fpregs, td2, addr); break; case PT_SETDBREGS: CTR2(KTR_PTRACE, "PT_SETDBREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(dbregs, td2, addr); break; case PT_GETDBREGS: CTR2(KTR_PTRACE, "PT_GETDBREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(dbregs, td2, addr); break; case PT_SETREGSET: CTR2(KTR_PTRACE, "PT_SETREGSET: tid %d (pid %d)", td2->td_tid, p->p_pid); error = proc_write_regset(td2, data, addr); break; case PT_GETREGSET: CTR2(KTR_PTRACE, "PT_GETREGSET: tid %d (pid %d)", td2->td_tid, p->p_pid); error = proc_read_regset(td2, data, addr); break; case PT_LWPINFO: if (data <= 0 || data > sizeof(*pl)) { error = EINVAL; break; } pl = addr; bzero(pl, sizeof(*pl)); pl->pl_lwpid = td2->td_tid; pl->pl_event = PL_EVENT_NONE; pl->pl_flags = 0; if (td2->td_dbgflags & TDB_XSIG) { pl->pl_event = PL_EVENT_SIGNAL; if (td2->td_si.si_signo != 0 && data >= offsetof(struct ptrace_lwpinfo, pl_siginfo) + sizeof(pl->pl_siginfo)){ pl->pl_flags |= PL_FLAG_SI; pl->pl_siginfo = td2->td_si; } } if (td2->td_dbgflags & TDB_SCE) pl->pl_flags |= PL_FLAG_SCE; else if (td2->td_dbgflags & TDB_SCX) pl->pl_flags |= PL_FLAG_SCX; if (td2->td_dbgflags & TDB_EXEC) pl->pl_flags |= PL_FLAG_EXEC; if (td2->td_dbgflags & TDB_FORK) { pl->pl_flags |= PL_FLAG_FORKED; pl->pl_child_pid = td2->td_dbg_forked; if (td2->td_dbgflags & TDB_VFORK) pl->pl_flags |= PL_FLAG_VFORKED; } else if ((td2->td_dbgflags & (TDB_SCX | TDB_VFORK)) == TDB_VFORK) pl->pl_flags |= PL_FLAG_VFORK_DONE; if (td2->td_dbgflags & TDB_CHILD) pl->pl_flags |= PL_FLAG_CHILD; if (td2->td_dbgflags & TDB_BORN) pl->pl_flags |= PL_FLAG_BORN; if (td2->td_dbgflags & TDB_EXIT) pl->pl_flags |= PL_FLAG_EXITED; pl->pl_sigmask = td2->td_sigmask; pl->pl_siglist = td2->td_siglist; strcpy(pl->pl_tdname, td2->td_name); if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) { pl->pl_syscall_code = td2->td_sa.code; pl->pl_syscall_narg = td2->td_sa.callp->sy_narg; } else { pl->pl_syscall_code = 0; pl->pl_syscall_narg = 0; } CTR6(KTR_PTRACE, "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d syscall %d", td2->td_tid, p->p_pid, pl->pl_event, pl->pl_flags, pl->pl_child_pid, pl->pl_syscall_code); break; case PT_GETNUMLWPS: CTR2(KTR_PTRACE, "PT_GETNUMLWPS: pid %d: %d threads", p->p_pid, p->p_numthreads); td->td_retval[0] = p->p_numthreads; break; case PT_GETLWPLIST: CTR3(KTR_PTRACE, "PT_GETLWPLIST: pid %d: data %d, actual %d", p->p_pid, data, p->p_numthreads); if (data <= 0) { error = EINVAL; break; } num = imin(p->p_numthreads, data); PROC_UNLOCK(p); buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK); tmp = 0; PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (tmp >= num) break; buf[tmp++] = td2->td_tid; } PROC_UNLOCK(p); error = copyout(buf, addr, tmp * sizeof(lwpid_t)); free(buf, M_TEMP); if (!error) td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_VM_TIMESTAMP: CTR2(KTR_PTRACE, "PT_VM_TIMESTAMP: pid %d: timestamp %d", p->p_pid, p->p_vmspace->vm_map.timestamp); td->td_retval[0] = p->p_vmspace->vm_map.timestamp; break; case PT_VM_ENTRY: PROC_UNLOCK(p); error = ptrace_vm_entry(td, p, addr); PROC_LOCK(p); break; case PT_COREDUMP: pc = addr; CTR2(KTR_PTRACE, "PT_COREDUMP: pid %d, fd %d", p->p_pid, pc->pc_fd); if ((pc->pc_flags & ~(PC_COMPRESS | PC_ALL)) != 0) { error = EINVAL; break; } PROC_UNLOCK(p); tcq = malloc(sizeof(*tcq), M_TEMP, M_WAITOK | M_ZERO); fp = NULL; error = fget_write(td, pc->pc_fd, &cap_write_rights, &fp); if (error != 0) goto coredump_cleanup_nofp; if (fp->f_type != DTYPE_VNODE || fp->f_vnode->v_type != VREG) { error = EPIPE; goto coredump_cleanup; } PROC_LOCK(p); error = proc_can_ptrace(td, p); if (error != 0) goto coredump_cleanup_locked; td2 = ptrace_sel_coredump_thread(p); if (td2 == NULL) { error = EBUSY; goto coredump_cleanup_locked; } KASSERT((td2->td_dbgflags & TDB_COREDUMPRQ) == 0, ("proc %d tid %d req coredump", p->p_pid, td2->td_tid)); tcq->tc_vp = fp->f_vnode; tcq->tc_limit = pc->pc_limit == 0 ? OFF_MAX : pc->pc_limit; tcq->tc_flags = SVC_PT_COREDUMP; if ((pc->pc_flags & PC_COMPRESS) == 0) tcq->tc_flags |= SVC_NOCOMPRESS; if ((pc->pc_flags & PC_ALL) != 0) tcq->tc_flags |= SVC_ALL; td2->td_coredump = tcq; td2->td_dbgflags |= TDB_COREDUMPRQ; thread_run_flash(td2); while ((td2->td_dbgflags & TDB_COREDUMPRQ) != 0) msleep(p, &p->p_mtx, PPAUSE, "crdmp", 0); error = tcq->tc_error; coredump_cleanup_locked: PROC_UNLOCK(p); coredump_cleanup: fdrop(fp, td); coredump_cleanup_nofp: free(tcq, M_TEMP); PROC_LOCK(p); break; default: #ifdef __HAVE_PTRACE_MACHDEP if (req >= PT_FIRSTMACH) { PROC_UNLOCK(p); error = cpu_ptrace(td2, req, addr, data); PROC_LOCK(p); } else #endif /* Unknown request. */ error = EINVAL; break; } out: /* Drop our hold on this process now that the request has completed. */ _PRELE(p); fail: if (p2_req_set) { if ((p->p_flag2 & P2_PTRACEREQ) != 0) wakeup(&p->p_flag2); p->p_flag2 &= ~P2_PTRACEREQ; } PROC_UNLOCK(p); if (proctree_locked) sx_xunlock(&proctree_lock); return (error); } #undef PROC_READ #undef PROC_WRITE diff --git a/sys/riscv/riscv/trap.c b/sys/riscv/riscv/trap.c index b89eae325e0c..0744f5a25fb3 100644 --- a/sys/riscv/riscv/trap.c +++ b/sys/riscv/riscv/trap.c @@ -1,414 +1,414 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #ifdef FPE #include #endif #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif int (*dtrace_invop_jump_addr)(struct trapframe *); /* Called from exception.S */ void do_trap_supervisor(struct trapframe *); void do_trap_user(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; ksi.ksi_trapno = trapno; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; - register_t *ap, *dst_ap; + syscallarg_t *ap, *dst_ap; struct syscall_args *sa; p = td->td_proc; sa = &td->td_sa; ap = &td->td_frame->tf_a[0]; dst_ap = &sa->args[0]; sa->code = td->td_frame->tf_t[0]; sa->original_code = sa->code; if (__predict_false(sa->code == SYS_syscall || sa->code == SYS___syscall)) { sa->code = *ap++; } else { *dst_ap++ = *ap++; } if (__predict_false(sa->code >= p->p_sysent->sv_size)) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; KASSERT(sa->callp->sy_narg <= nitems(sa->args), ("Syscall %d takes too many arguments", sa->code)); - memcpy(dst_ap, ap, (NARGREG - 1) * sizeof(register_t)); + memcpy(dst_ap, ap, (NARGREG - 1) * sizeof(*dst_ap)); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void dump_regs(struct trapframe *frame) { int n; int i; n = nitems(frame->tf_t); for (i = 0; i < n; i++) printf("t[%d] == 0x%016lx\n", i, frame->tf_t[i]); n = nitems(frame->tf_s); for (i = 0; i < n; i++) printf("s[%d] == 0x%016lx\n", i, frame->tf_s[i]); n = nitems(frame->tf_a); for (i = 0; i < n; i++) printf("a[%d] == 0x%016lx\n", i, frame->tf_a[i]); printf("ra == 0x%016lx\n", frame->tf_ra); printf("sp == 0x%016lx\n", frame->tf_sp); printf("gp == 0x%016lx\n", frame->tf_gp); printf("tp == 0x%016lx\n", frame->tf_tp); printf("sepc == 0x%016lx\n", frame->tf_sepc); printf("sstatus == 0x%016lx\n", frame->tf_sstatus); } static void ecall_handler(void) { struct thread *td; td = curthread; syscallenter(td); syscallret(td); } static void page_fault_handler(struct trapframe *frame, int usermode) { struct vm_map *map; uint64_t stval; struct thread *td; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; struct proc *p; int error, sig, ucode; #ifdef KDB bool handled; #endif #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; p = td->td_proc; pcb = td->td_pcb; stval = frame->tf_stval; if (td->td_critnest != 0 || td->td_intr_nesting_level != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) goto fatal; if (usermode) { if (!VIRT_IS_VALID(stval)) { call_trapsignal(td, SIGSEGV, SEGV_MAPERR, (void *)stval, frame->tf_scause & SCAUSE_CODE); goto done; } map = &p->p_vmspace->vm_map; } else { /* * Enable interrupts for the duration of the page fault. For * user faults this was done already in do_trap_user(). */ intr_enable(); if (!VIRT_IS_VALID(stval)) goto fatal; if (stval >= VM_MAX_USER_ADDRESS) { map = kernel_map; } else { if (pcb->pcb_onfault == 0) goto fatal; map = &p->p_vmspace->vm_map; } } va = trunc_page(stval); if (frame->tf_scause == SCAUSE_STORE_PAGE_FAULT) { ftype = VM_PROT_WRITE; } else if (frame->tf_scause == SCAUSE_INST_PAGE_FAULT) { ftype = VM_PROT_EXECUTE; } else { ftype = VM_PROT_READ; } if (pmap_fault(map->pmap, va, ftype)) goto done; error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { if (usermode) { call_trapsignal(td, sig, ucode, (void *)stval, frame->tf_scause & SCAUSE_CODE); } else { if (pcb->pcb_onfault != 0) { frame->tf_a[0] = error; frame->tf_sepc = pcb->pcb_onfault; return; } goto fatal; } } done: if (usermode) userret(td, frame); return; fatal: dump_regs(frame); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(frame->tf_scause & SCAUSE_CODE, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("Fatal page fault at %#lx: %#016lx", frame->tf_sepc, stval); } void do_trap_supervisor(struct trapframe *frame) { uint64_t exception; /* Ensure we came from supervisor mode, interrupts disabled */ KASSERT((csr_read(sstatus) & (SSTATUS_SPP | SSTATUS_SIE)) == SSTATUS_SPP, ("Came from S mode with interrupts enabled")); KASSERT((csr_read(sstatus) & (SSTATUS_SUM)) == 0, ("Came from S mode with SUM enabled")); exception = frame->tf_scause & SCAUSE_CODE; if ((frame->tf_scause & SCAUSE_INTR) != 0) { /* Interrupt */ riscv_cpu_intr(frame); return; } #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch (exception) { case SCAUSE_LOAD_ACCESS_FAULT: case SCAUSE_STORE_ACCESS_FAULT: case SCAUSE_INST_ACCESS_FAULT: dump_regs(frame); panic("Memory access exception at 0x%016lx\n", frame->tf_sepc); break; case SCAUSE_STORE_PAGE_FAULT: case SCAUSE_LOAD_PAGE_FAULT: case SCAUSE_INST_PAGE_FAULT: page_fault_handler(frame, 0); break; case SCAUSE_BREAKPOINT: #ifdef KDTRACE_HOOKS if (dtrace_invop_jump_addr != NULL && dtrace_invop_jump_addr(frame) == 0) break; #endif #ifdef KDB kdb_trap(exception, 0, frame); #else dump_regs(frame); panic("No debugger in kernel.\n"); #endif break; case SCAUSE_ILLEGAL_INSTRUCTION: dump_regs(frame); panic("Illegal instruction at 0x%016lx\n", frame->tf_sepc); break; default: dump_regs(frame); panic("Unknown kernel exception %lx trap value %lx\n", exception, frame->tf_stval); } } void do_trap_user(struct trapframe *frame) { uint64_t exception; struct thread *td; struct pcb *pcb; td = curthread; pcb = td->td_pcb; KASSERT(td->td_frame == frame, ("%s: td_frame %p != frame %p", __func__, td->td_frame, frame)); /* Ensure we came from usermode, interrupts disabled */ KASSERT((csr_read(sstatus) & (SSTATUS_SPP | SSTATUS_SIE)) == 0, ("Came from U mode with interrupts enabled")); KASSERT((csr_read(sstatus) & (SSTATUS_SUM)) == 0, ("Came from U mode with SUM enabled")); exception = frame->tf_scause & SCAUSE_CODE; if ((frame->tf_scause & SCAUSE_INTR) != 0) { /* Interrupt */ riscv_cpu_intr(frame); return; } intr_enable(); CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch (exception) { case SCAUSE_LOAD_ACCESS_FAULT: case SCAUSE_STORE_ACCESS_FAULT: case SCAUSE_INST_ACCESS_FAULT: call_trapsignal(td, SIGBUS, BUS_ADRERR, (void *)frame->tf_sepc, exception); userret(td, frame); break; case SCAUSE_STORE_PAGE_FAULT: case SCAUSE_LOAD_PAGE_FAULT: case SCAUSE_INST_PAGE_FAULT: page_fault_handler(frame, 1); break; case SCAUSE_ECALL_USER: frame->tf_sepc += 4; /* Next instruction */ ecall_handler(); break; case SCAUSE_ILLEGAL_INSTRUCTION: #ifdef FPE if ((pcb->pcb_fpflags & PCB_FP_STARTED) == 0) { /* * May be a FPE trap. Enable FPE usage * for this thread and try again. */ fpe_state_clear(); frame->tf_sstatus &= ~SSTATUS_FS_MASK; frame->tf_sstatus |= SSTATUS_FS_CLEAN; pcb->pcb_fpflags |= PCB_FP_STARTED; break; } #endif call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)frame->tf_sepc, exception); userret(td, frame); break; case SCAUSE_BREAKPOINT: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_sepc, exception); userret(td, frame); break; default: dump_regs(frame); panic("Unknown userland exception %lx, trap value %lx\n", exception, frame->tf_stval); } } diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index 283a95c36d6c..d00981a93d24 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -1,327 +1,327 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ktrace.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _SYS_KTRACE_H_ #define _SYS_KTRACE_H_ #include /* * operations to ktrace system call (KTROP(op)) */ #define KTROP_SET 0 /* set trace points */ #define KTROP_CLEAR 1 /* clear trace points */ #define KTROP_CLEARFILE 2 /* stop all tracing to file */ #define KTROP(o) ((o)&3) /* macro to extract operation */ /* * flags (ORed in with operation) */ #define KTRFLAG_DESCEND 4 /* perform op on all children too */ /* * ktrace record header */ struct ktr_header { int ktr_len; /* length of buf */ short ktr_type; /* trace record type */ pid_t ktr_pid; /* process id */ char ktr_comm[MAXCOMLEN + 1];/* command name */ struct timeval ktr_time; /* timestamp */ intptr_t ktr_tid; /* was ktr_buffer */ }; /* * Test for kernel trace point (MP SAFE). * * KTRCHECK() just checks that the type is enabled and is only for * internal use in the ktrace subsystem. KTRPOINT() checks against * ktrace recursion as well as checking that the type is enabled and * is the public interface. */ #define KTRCHECK(td, type) ((td)->td_proc->p_traceflag & (1 << type)) #define KTRPOINT(td, type) (__predict_false(KTRCHECK((td), (type)))) #define KTRCHECKDRAIN(td) (!(STAILQ_EMPTY(&(td)->td_proc->p_ktr))) #define KTRUSERRET(td) do { \ if (__predict_false(KTRCHECKDRAIN(td))) \ ktruserret(td); \ } while (0) /* * ktrace record types */ /* * KTR_SYSCALL - system call record */ #define KTR_SYSCALL 1 struct ktr_syscall { short ktr_code; /* syscall number */ short ktr_narg; /* number of arguments */ /* * followed by ktr_narg register_t */ register_t ktr_args[1]; }; /* * KTR_SYSRET - return from system call record */ #define KTR_SYSRET 2 struct ktr_sysret { short ktr_code; short ktr_eosys; int ktr_error; register_t ktr_retval; }; /* * KTR_NAMEI - namei record */ #define KTR_NAMEI 3 /* record contains pathname */ /* * KTR_GENIO - trace generic process i/o */ #define KTR_GENIO 4 struct ktr_genio { int ktr_fd; enum uio_rw ktr_rw; /* * followed by data successfully read/written */ }; /* * KTR_PSIG - trace processed signal */ #define KTR_PSIG 5 struct ktr_psig { int signo; sig_t action; int code; sigset_t mask; }; /* * KTR_CSW - trace context switches */ #define KTR_CSW 6 struct ktr_csw_old { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ }; struct ktr_csw { int out; /* 1 if switch out, 0 if switch in */ int user; /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */ char wmesg[8]; }; /* * KTR_USER - data coming from userland */ #define KTR_USER_MAXLEN 2048 /* maximum length of passed data */ #define KTR_USER 7 /* * KTR_STRUCT - misc. structs */ #define KTR_STRUCT 8 /* * record contains null-terminated struct name followed by * struct contents */ struct sockaddr; struct stat; struct sysentvec; /* * KTR_SYSCTL - name of a sysctl MIB */ #define KTR_SYSCTL 9 /* record contains null-terminated MIB name */ /* * KTR_PROCCTOR - trace process creation (multiple ABI support) */ #define KTR_PROCCTOR 10 struct ktr_proc_ctor { u_int sv_flags; /* struct sysentvec sv_flags copy */ }; /* * KTR_PROCDTOR - trace process destruction (multiple ABI support) */ #define KTR_PROCDTOR 11 /* * KTR_CAPFAIL - trace capability check failures */ #define KTR_CAPFAIL 12 enum ktr_cap_fail_type { CAPFAIL_NOTCAPABLE, /* insufficient capabilities in cap_check() */ CAPFAIL_INCREASE, /* attempt to increase capabilities */ CAPFAIL_SYSCALL, /* disallowed system call */ CAPFAIL_LOOKUP, /* disallowed VFS lookup */ }; struct ktr_cap_fail { enum ktr_cap_fail_type cap_type; cap_rights_t cap_needed; cap_rights_t cap_held; }; /* * KTR_FAULT - page fault record */ #define KTR_FAULT 13 struct ktr_fault { vm_offset_t vaddr; int type; }; /* * KTR_FAULTEND - end of page fault record */ #define KTR_FAULTEND 14 struct ktr_faultend { int result; }; /* * KTR_STRUCT_ARRAY - array of misc. structs */ #define KTR_STRUCT_ARRAY 15 struct ktr_struct_array { size_t struct_size; /* * Followed by null-terminated structure name and then payload * contents. */ }; /* * KTR_DROP - If this bit is set in ktr_type, then at least one event * between the previous record and this record was dropped. */ #define KTR_DROP 0x8000 /* * kernel trace points (in p_traceflag) */ #define KTRFAC_MASK 0x00ffffff #define KTRFAC_SYSCALL (1<sa_len) #define ktrstat(s) \ ktrstruct("stat", (s), sizeof(struct stat)) #define ktrstat_error(s, error) \ ktrstruct_error("stat", (s), sizeof(struct stat), error) extern u_int ktr_geniosize; #ifdef KTRACE extern int ktr_filesize_limit_signal; #else #define ktr_filesize_limit_signal 0 #endif #else #include __BEGIN_DECLS int ktrace(const char *, int, int, pid_t); int utrace(const void *, size_t); __END_DECLS #endif #endif diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 71105239e40e..2556fd574d84 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1,1322 +1,1322 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD$ */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include /* For struct callout. */ #include /* For struct klist. */ #ifdef _KERNEL #include #endif #include #ifndef _KERNEL #include #endif #include #include #include #include #include #include #include /* XXX. */ #include #include #include #include #include #ifndef _KERNEL #include /* For structs itimerval, timeval. */ #else #include #include #endif #include #include #include #include #include /* Machine-dependent proc substruct. */ #ifdef _KERNEL #include #endif /* * One structure allocated per session. * * List of locks * (m) locked by s_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct session { u_int s_count; /* Ref cnt; pgrps in session - atomic. */ struct proc *s_leader; /* (m + e) Session leader. */ struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */ struct cdev_priv *s_ttydp; /* (m) Device of controlling tty. */ struct tty *s_ttyp; /* (e) Controlling tty. */ pid_t s_sid; /* (c) Session ID. */ /* (m) Setlogin() name: */ char s_login[roundup(MAXLOGNAME, sizeof(long))]; struct mtx s_mtx; /* Mutex to protect members. */ }; /* * One structure allocated per process group. * * List of locks * (m) locked by pg_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct pgrp { LIST_ENTRY(pgrp) pg_hash; /* (e) Hash chain. */ LIST_HEAD(, proc) pg_members; /* (m + e) Pointer to pgrp members. */ struct session *pg_session; /* (c) Pointer to session. */ struct sigiolst pg_sigiolst; /* (m) List of sigio sources. */ pid_t pg_id; /* (c) Process group id. */ struct mtx pg_mtx; /* Mutex to protect members */ int pg_flags; /* (m) PGRP_ flags */ }; #define PGRP_ORPHANED 0x00000001 /* Group is orphaned */ /* * pargs, used to hold a copy of the command line, if it had a sane length. */ struct pargs { u_int ar_ref; /* Reference count. */ u_int ar_length; /* Length. */ u_char ar_args[1]; /* Arguments. */ }; /*- * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressable except for those marked "(CPU)" below, * which might be addressable only on a processor on which the process * is running. * * Below is a key of locks used to protect each member of struct proc. The * lock is indicated by a reference to a specific character in parens in the * associated comment. * * - not yet protected * a - only touched by curproc or parent during fork/wait * b - created at fork, never changes * (exception aiods switch vmspaces, but they are also * marked 'P_SYSTEM' so hopefully it will be left alone) * c - locked by proc mtx * d - locked by allproc_lock lock * e - locked by proctree_lock lock * f - session mtx * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx * j - locked by proc slock * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt * kx- only accessed by curthread and by debugger * l - the attaching proc or attaching proc parent * m - Giant * n - not locked, lazy * o - ktrace lock * q - td_contested lock * r - p_peers lock * s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9) * t - thread lock * u - process stat lock * w - process timer lock * x - created at fork, only changes during single threading in exec * y - created at first aio, doesn't change until exit or exec at which * point we are single-threaded and only curthread changes it * z - zombie threads lock * * If the locking key specifies two identifiers (for example, p_pptr) then * either lock is sufficient for read access, but both locks must be held * for write access. */ struct cpuset; struct filecaps; struct filemon; struct kaioinfo; struct kaudit_record; struct kcov_info; struct kdtrace_proc; struct kdtrace_thread; struct kmsan_td; struct kq_timer_cb_data; struct mqueue_notifier; struct p_sched; struct proc; struct procdesc; struct racct; struct sbuf; struct sleepqueue; struct socket; struct td_sched; struct thread; struct trapframe; struct turnstile; struct vm_map; struct vm_map_entry; struct epoch_tracker; struct syscall_args { u_int code; u_int original_code; struct sysent *callp; register_t args[8]; }; /* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts * (with control info in the "previous" times), and are converted when * userland asks for rusage info. Backwards compatibility prevents putting * this directly in the user-visible rusage struct. * * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux. * Locking for td_rux: (t) for all fields. */ struct rusage_ext { uint64_t rux_runtime; /* (cu) Real time. */ uint64_t rux_uticks; /* (cu) Statclock hits in user mode. */ uint64_t rux_sticks; /* (cu) Statclock hits in sys mode. */ uint64_t rux_iticks; /* (cu) Statclock hits in intr mode. */ uint64_t rux_uu; /* (c) Previous user time in usec. */ uint64_t rux_su; /* (c) Previous sys time in usec. */ uint64_t rux_tu; /* (c) Previous total time in usec. */ }; /* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. * Thread context. Processes may have multiple threads. */ struct thread { struct mtx *volatile td_lock; /* replaces sched lock */ struct proc *td_proc; /* (*) Associated process. */ TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ TAILQ_ENTRY(thread) td_runq; /* (t) Run queue. */ union { TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */ struct thread *td_zombie; /* Zombie list linkage */ }; TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */ LIST_ENTRY(thread) td_hash; /* (d) Hash chain. */ struct cpuset *td_cpuset; /* (t) CPU affinity mask. */ struct domainset_ref td_domain; /* (a) NUMA policy */ struct seltd *td_sel; /* Select queue/channel. */ struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */ struct turnstile *td_turnstile; /* (k) Associated turnstile. */ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */ struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */ lwpid_t td_tid; /* (b) Thread ID. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals u_char td_lend_user_pri; /* (t) Lend user pri. */ u_char td_allocdomain; /* (b) NUMA domain backing this struct thread. */ struct kmsan_td *td_kmsan; /* (k) KMSAN state */ /* Cleared during fork1() */ #define td_startzero td_flags int td_flags; /* (t) TDF_* flags. */ int td_inhibitors; /* (t) Why can not run. */ int td_pflags; /* (k) Private thread (TDP_*) flags. */ int td_pflags2; /* (k) Private thread (TDP2_*) flags. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ int td_sqqueue; /* (t) Sleepqueue queue blocked on. */ const void *td_wchan; /* (t) Sleep address. */ const char *td_wmesg; /* (t) Reason for sleep. */ volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */ u_char td_tsqueue; /* (t) Turnstile queue blocked on. */ short td_locks; /* (k) Debug: count of non-spin locks */ short td_rw_rlocks; /* (k) Count of rwlock read locks. */ short td_sx_slocks; /* (k) Count of sx shared locks. */ short td_lk_slocks; /* (k) Count of lockmgr shared locks. */ short td_stopsched; /* (k) Scheduler stopped. */ struct turnstile *td_blocked; /* (t) Lock thread is blocked on. */ const char *td_lockname; /* (t) Name of lock blocked on. */ LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ int td_pinned; /* (k) Temporary cpu pin count. */ struct ucred *td_realucred; /* (k) Reference to credentials. */ struct ucred *td_ucred; /* (k) Used credentials, temporarily switchable. */ struct plimit *td_limit; /* (k) Resource limits. */ int td_slptick; /* (t) Time at sleep. */ int td_blktick; /* (t) Time spent blocked. */ int td_swvoltick; /* (t) Time at last SW_VOL switch. */ int td_swinvoltick; /* (t) Time at last SW_INVOL switch. */ u_int td_cow; /* (*) Number of copy-on-write faults */ struct rusage td_ru; /* (t) rusage information. */ struct rusage_ext td_rux; /* (t) Internal rusage information. */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ u_int td_pticks; /* (t) Statclock hits for profiling */ u_int td_sticks; /* (t) Statclock hits in system mode. */ u_int td_iticks; /* (t) Statclock hits in intr mode. */ u_int td_uticks; /* (t) Statclock hits in user mode. */ int td_intrval; /* (t) Return value for sleepq. */ sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */ volatile u_int td_generation; /* (k) For detection of preemption */ stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */ int td_xsig; /* (c) Signal for ptrace */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ u_int td_profil_ticks; /* (k) Temporary ticks until AST. */ char td_name[MAXCOMLEN + 1]; /* (*) Thread name. */ struct file *td_fpop; /* (k) file referencing cdev under op */ int td_dbgflags; /* (c) Userland debugger flags */ siginfo_t td_si; /* (c) For debugger or core file */ int td_ng_outbound; /* (k) Thread entered ng from above. */ struct osd td_osd; /* (k) Object specific data. */ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ pid_t td_dbg_forked; /* (c) Child pid for debugger. */ struct vnode *td_vp_reserved;/* (k) Preallocated vnode. */ u_int td_no_sleeping; /* (k) Sleeping disabled count. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ int td_rtcgen; /* (s) rtc_generation of abs. sleep */ int td_errno; /* (k) Error from last syscall. */ size_t td_vslock_sz; /* (k) amount of vslock-ed space */ struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */ u_int td_ucredref; /* (k) references on td_realucred */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ #define td_startcopy td_endzero sigset_t td_sigmask; /* (c) Current signal mask. */ u_char td_rqindex; /* (t) Run queue index. */ u_char td_base_pri; /* (t) Thread base kernel priority. */ u_char td_priority; /* (t) Thread active priority. */ u_char td_pri_class; /* (t) Scheduling class. */ u_char td_user_pri; /* (t) User pri from estcpu and nice. */ u_char td_base_user_pri; /* (t) Base user pri */ uintptr_t td_rb_list; /* (k) Robust list head. */ uintptr_t td_rbp_list; /* (k) Robust priv list head. */ uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */ struct syscall_args td_sa; /* (kx) Syscall parameters. Copied on fork for child tracing. */ void *td_sigblock_ptr; /* (k) uptr for fast sigblock. */ uint32_t td_sigblock_val; /* (k) fast sigblock value read at td_sigblock_ptr on kern entry */ #define td_endcopy td_pcb /* * Fields that must be manually set in fork1() or create_thread() * or already have been set in the allocator, constructor, etc. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ enum td_states { TDS_INACTIVE = 0x0, TDS_INHIBITED, TDS_CAN_RUN, TDS_RUNQ, TDS_RUNNING } td_state; /* (t) thread state */ /* Note: td_state must be accessed using TD_{GET,SET}_STATE(). */ union { - register_t tdu_retval[2]; + syscallarg_t tdu_retval[2]; off_t tdu_off; } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval u_int td_cowgen; /* (k) Generation of COW pointers. */ /* LP64 hole */ struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ int td_kstack_pages; /* (a) Size of the kstack. */ volatile u_int td_critnest; /* (k*) Critical section nest level. */ struct mdthread td_md; /* (k) Any machine-dependent fields. */ struct kaudit_record *td_ar; /* (k) Active audit record, if any. */ struct lpohead td_lprof[2]; /* (a) lock profiling objects. */ struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */ struct vnet *td_vnet; /* (k) Effective vnet. */ const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */ struct trapframe *td_intr_frame;/* (k) Frame of the current irq */ struct proc *td_rfppwait_p; /* (k) The vforked child */ struct vm_page **td_ma; /* (k) uio pages held */ int td_ma_cnt; /* (k) size of *td_ma */ /* LP64 hole */ void *td_emuldata; /* Emulator state data */ int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ int td_pmcpend; void *td_coredump; /* (c) coredump request. */ off_t td_ktr_io_lim; /* (k) limit for ktrace file size */ #ifdef EPOCH_TRACE SLIST_HEAD(, epoch_tracker) td_epochs; #endif }; struct thread0_storage { struct thread t0st_thread; uint64_t t0st_sched[10]; }; struct mtx *thread_lock_block(struct thread *); void thread_lock_block_wait(struct thread *); void thread_lock_set(struct thread *, struct mtx *); void thread_lock_unblock(struct thread *, struct mtx *); #define THREAD_LOCK_ASSERT(td, type) \ mtx_assert((td)->td_lock, (type)) #define THREAD_LOCK_BLOCKED_ASSERT(td, type) \ do { \ struct mtx *__m = (td)->td_lock; \ if (__m != &blocked_lock) \ mtx_assert(__m, (type)); \ } while (0) #ifdef INVARIANTS #define THREAD_LOCKPTR_ASSERT(td, lock) \ do { \ struct mtx *__m; \ __m = (td)->td_lock; \ KASSERT(__m == (lock), \ ("Thread %p lock %p does not match %p", td, __m, (lock))); \ } while (0) #define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock) \ do { \ struct mtx *__m; \ __m = (td)->td_lock; \ KASSERT(__m == (lock) || __m == &blocked_lock, \ ("Thread %p lock %p does not match %p", td, __m, (lock))); \ } while (0) #define TD_LOCKS_INC(td) ((td)->td_locks++) #define TD_LOCKS_DEC(td) do { \ KASSERT(SCHEDULER_STOPPED_TD(td) || (td)->td_locks > 0, \ ("thread %p owns no locks", (td))); \ (td)->td_locks--; \ } while (0) #else #define THREAD_LOCKPTR_ASSERT(td, lock) #define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock) #define TD_LOCKS_INC(td) #define TD_LOCKS_DEC(td) #endif /* * Flags kept in td_flags: * To change these you MUST have the scheduler lock. */ #define TDF_BORROWING 0x00000001 /* Thread is borrowing pri from another. */ #define TDF_INPANIC 0x00000002 /* Caused a panic, let it drive crashdump. */ #define TDF_INMEM 0x00000004 /* Thread's stack is in memory. */ #define TDF_SINTR 0x00000008 /* Sleep is interruptible. */ #define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */ #define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */ #define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */ #define TDF_SIGWAIT 0x00000080 /* Ignore ignored signals */ #define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ #define TDF_KQTICKLED 0x00001000 /* AST drain kqueue taskqueue */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ #define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */ #define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */ #define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */ #define TDF_SERESTART 0x00080000 /* ERESTART on stop attempts. */ #define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */ #define TDF_SEINTR 0x00200000 /* EINTR on stop attempts. */ #define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */ #define TDF_UNUSED23 0x00800000 /* --available-- */ #define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */ #define TDF_SCHED1 0x02000000 /* Reserved for scheduler private use */ #define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */ #define TDF_SCHED3 0x08000000 /* Reserved for scheduler private use */ #define TDF_ALRMPEND 0x10000000 /* Pending SIGVTALRM needs to be posted. */ #define TDF_PROFPEND 0x20000000 /* Pending SIGPROF needs to be posted. */ #define TDF_MACPEND 0x40000000 /* AST-based MAC event pending. */ /* Userland debug flags */ #define TDB_SUSPEND 0x00000001 /* Thread is suspended by debugger */ #define TDB_XSIG 0x00000002 /* Thread is exchanging signal under trace */ #define TDB_USERWR 0x00000004 /* Debugger modified memory or registers */ #define TDB_SCE 0x00000008 /* Thread performs syscall enter */ #define TDB_SCX 0x00000010 /* Thread performs syscall exit */ #define TDB_EXEC 0x00000020 /* TDB_SCX from exec(2) family */ #define TDB_FORK 0x00000040 /* TDB_SCX from fork(2) that created new process */ #define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child only) */ #define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */ #define TDB_BORN 0x00000200 /* New LWP indicator for ptrace() */ #define TDB_EXIT 0x00000400 /* Exiting LWP indicator for ptrace() */ #define TDB_VFORK 0x00000800 /* vfork indicator for ptrace() */ #define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */ #define TDB_STEP 0x00002000 /* (x86) PSL_T set for PT_STEP */ #define TDB_SSWITCH 0x00004000 /* Suspended in ptracestop */ #define TDB_COREDUMPRQ 0x00008000 /* Coredump request */ /* * "Private" flags kept in td_pflags: * These are only written by curthread and thus need no locking. */ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ #define TDP_BUFNEED 0x00000008 /* Do not recurse into the buf flush */ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock acquisition - deadlock treatment. */ #define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */ #define TDP_SIGFASTBLOCK 0x00000100 /* Fast sigblock active */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ #define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */ #define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */ #define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */ #define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */ #define TDP_SCHED4 0x00008000 /* Reserved for scheduler private use */ #define TDP_GEOM 0x00010000 /* Settle GEOM before finishing syscall */ #define TDP_SOFTDEP 0x00020000 /* Stuck processing softdep worklist */ #define TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */ #define TDP_WAKEUP 0x00080000 /* Don't sleep in umtx cond_wait */ #define TDP_INBDFLUSH 0x00100000 /* Already in BO_BDFLUSH, do not recurse */ #define TDP_KTHREAD 0x00200000 /* This is an official kernel thread */ #define TDP_CALLCHAIN 0x00400000 /* Capture thread's callchain */ #define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */ #define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */ #define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */ #define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */ #define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ #define TDP_UNUSED0 0x20000000 /* UNUSED */ #define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */ #define TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */ #define TDP2_SBPAGES 0x00000001 /* Owns sbusy on some pages */ #define TDP2_COMPAT32RB 0x00000002 /* compat32 ABI for robust lists */ #define TDP2_ACCT 0x00000004 /* Doing accounting */ /* * Reasons that the current thread can not be run yet. * More than one may apply. */ #define TDI_SUSPENDED 0x0001 /* On suspension queue. */ #define TDI_SLEEPING 0x0002 /* Actually asleep! (tricky). */ #define TDI_SWAPPED 0x0004 /* Stack not in mem. Bad juju if run. */ #define TDI_LOCK 0x0008 /* Stopped on a lock. */ #define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ #define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) #define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) #define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) #define TD_IS_SWAPPED(td) ((td)->td_inhibitors & TDI_SWAPPED) #define TD_ON_LOCK(td) ((td)->td_inhibitors & TDI_LOCK) #define TD_AWAITING_INTR(td) ((td)->td_inhibitors & TDI_IWAIT) #ifdef _KERNEL #define TD_GET_STATE(td) atomic_load_int(&(td)->td_state) #else #define TD_GET_STATE(td) ((td)->td_state) #endif #define TD_IS_RUNNING(td) (TD_GET_STATE(td) == TDS_RUNNING) #define TD_ON_RUNQ(td) (TD_GET_STATE(td) == TDS_RUNQ) #define TD_CAN_RUN(td) (TD_GET_STATE(td) == TDS_CAN_RUN) #define TD_IS_INHIBITED(td) (TD_GET_STATE(td) == TDS_INHIBITED) #define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED) #define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD) #define TD_CAN_ABORT(td) (TD_ON_SLEEPQ((td)) && \ ((td)->td_flags & TDF_SINTR) != 0) #define KTDSTATE(td) \ (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") #define TD_SET_INHIB(td, inhib) do { \ TD_SET_STATE(td, TDS_INHIBITED); \ (td)->td_inhibitors |= (inhib); \ } while (0) #define TD_CLR_INHIB(td, inhib) do { \ if (((td)->td_inhibitors & (inhib)) && \ (((td)->td_inhibitors &= ~(inhib)) == 0)) \ TD_SET_STATE(td, TDS_CAN_RUN); \ } while (0) #define TD_SET_SLEEPING(td) TD_SET_INHIB((td), TDI_SLEEPING) #define TD_SET_SWAPPED(td) TD_SET_INHIB((td), TDI_SWAPPED) #define TD_SET_LOCK(td) TD_SET_INHIB((td), TDI_LOCK) #define TD_SET_SUSPENDED(td) TD_SET_INHIB((td), TDI_SUSPENDED) #define TD_SET_IWAIT(td) TD_SET_INHIB((td), TDI_IWAIT) #define TD_SET_EXITING(td) TD_SET_INHIB((td), TDI_EXITING) #define TD_CLR_SLEEPING(td) TD_CLR_INHIB((td), TDI_SLEEPING) #define TD_CLR_SWAPPED(td) TD_CLR_INHIB((td), TDI_SWAPPED) #define TD_CLR_LOCK(td) TD_CLR_INHIB((td), TDI_LOCK) #define TD_CLR_SUSPENDED(td) TD_CLR_INHIB((td), TDI_SUSPENDED) #define TD_CLR_IWAIT(td) TD_CLR_INHIB((td), TDI_IWAIT) #ifdef _KERNEL #define TD_SET_STATE(td, state) atomic_store_int(&(td)->td_state, state) #else #define TD_SET_STATE(td, state) (td)->td_state = state #endif #define TD_SET_RUNNING(td) TD_SET_STATE(td, TDS_RUNNING) #define TD_SET_RUNQ(td) TD_SET_STATE(td, TDS_RUNQ) #define TD_SET_CAN_RUN(td) TD_SET_STATE(td, TDS_CAN_RUN) #define TD_SBDRY_INTR(td) \ (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0) #define TD_SBDRY_ERRNO(td) \ (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART) /* * Process structure. */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ TAILQ_HEAD(, thread) p_threads; /* (c) all threads. */ struct mtx p_slock; /* process spin lock */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */ struct pwddesc *p_pd; /* (b) Cwd, chroot, jail, umask */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c) Resource limits. */ struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ int p_flag; /* (c) P_* flags. */ int p_flag2; /* (c) P2_* flags. */ enum p_states { PRS_NEW = 0, /* In creation */ PRS_NORMAL, /* threads can be run. */ PRS_ZOMBIE } p_state; /* (j/c) Process status. */ pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ struct proc *p_pptr; /* (c + e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ struct proc *p_reaper; /* (e) My reaper. */ LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants (if I am reaper). */ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of the same reaper. */ struct mtx p_mtx; /* (n) Lock for this struct. */ struct mtx p_statmtx; /* Lock for the stats */ struct mtx p_itimmtx; /* Lock for the virt/prof timers */ struct mtx p_profmtx; /* Lock for the profiling */ struct ksiginfo *p_ksi; /* Locked by parent proc lock */ sigqueue_t p_sigqueue; /* (c) Sigs not delivered to a td. */ #define p_siglist p_sigqueue.sq_signals pid_t p_oppid; /* (c + e) Real parent pid. */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_vmspace struct vmspace *p_vmspace; /* (b) Address space. */ u_int p_swtick; /* (c) Tick when swapped in or out. */ u_int p_cowgen; /* (c) Generation of COW pointers. */ struct itimerval p_realtimer; /* (c) Alarm timer. */ struct rusage p_ru; /* (a) Exit information. */ struct rusage_ext p_rux; /* (cu) Internal resource usage. */ struct rusage_ext p_crux; /* (c) Internal child resource usage. */ int p_profthreads; /* (c) Num threads in addupc_task. */ volatile int p_exitthreads; /* (j) Number of threads exiting */ int p_traceflag; /* (o) Kernel trace points. */ struct ktr_io_params *p_ktrioparms; /* (c + o) Params for ktrace. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ struct vnode *p_textdvp; /* (b) Dir containing textvp. */ char *p_binname; /* (b) Binary hardlink name. */ u_int p_lock; /* (c) Proclock (prevent swap) count. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ int p_sigparent; /* (c) Signal to parent on exit. */ int p_sig; /* (n) For core dump/debugger XXX. */ u_int p_ptevents; /* (c + e) ptrace() event mask. */ struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */ struct thread *p_singlethread;/* (c + j) If single threading this is it */ int p_suspcount; /* (j) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (j) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */ u_int p_treeflag; /* (e) P_TREE flags */ int p_pendingexits; /* (c) Count of pending thread exits. */ struct filemon *p_filemon; /* (c) filemon-specific data. */ int p_pdeathsig; /* (c) Signal from parent on exit. */ /* End area that is zeroed on creation. */ #define p_endzero p_magic /* The following fields are all copied upon creation in fork. */ #define p_startcopy p_endzero u_int p_magic; /* (b) Magic number. */ int p_osrel; /* (x) osreldate for the binary (from ELF note, if any) */ uint32_t p_fctl0; /* (x) ABI feature control, ELF note */ char p_comm[MAXCOMLEN + 1]; /* (x) Process name. */ struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */ struct pargs *p_args; /* (c) Process arguments. */ rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c) Process "nice" value. */ int p_fibnum; /* in this routing domain XXX MRT */ pid_t p_reapsubtree; /* (e) Pid of the direct child of the reaper which spawned our subtree. */ uint64_t p_elf_flags; /* (x) ELF flags */ void *p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for non ELF binaries. */ /* End area that is copied on creation. */ #define p_endcopy p_xexit u_int p_xexit; /* (c) Exit code. */ u_int p_xsig; /* (c) Stop/kill sig. */ struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ struct knlist *p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (c) Number of threads. */ struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ struct proc *p_peers; /* (r) */ struct proc *p_leader; /* (b) */ void *p_emuldata; /* (c) Emulator state data. */ struct label *p_label; /* (*) Proc (not subject) MAC label. */ STAILQ_HEAD(, ktr_request) p_ktr; /* (o) KTR event queue. */ LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/ struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */ struct cv p_pwait; /* (*) wait cv for exit/exec. */ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ struct racct *p_racct; /* (b) Resource accounting. */ int p_throttled; /* (c) Flag for racct pcpu throttling */ /* * An orphan is the child that has been re-parented to the * debugger as a result of attaching to it. Need to keep * track of them for parent to be able to collect the exit * status of what used to be children. */ LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */ LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */ TAILQ_HEAD(, kq_timer_cb_data) p_kqtim_stop; /* (c) */ }; #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id #define NOCPU (-1) /* For when we aren't on a CPU. */ #define NOCPU_OLD (255) #define MAXCPU_OLD (254) #define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_slock) #define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_slock) #define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_slock, (type)) #define PROC_STATLOCK(p) mtx_lock_spin(&(p)->p_statmtx) #define PROC_STATUNLOCK(p) mtx_unlock_spin(&(p)->p_statmtx) #define PROC_STATLOCK_ASSERT(p, type) mtx_assert(&(p)->p_statmtx, (type)) #define PROC_ITIMLOCK(p) mtx_lock_spin(&(p)->p_itimmtx) #define PROC_ITIMUNLOCK(p) mtx_unlock_spin(&(p)->p_itimmtx) #define PROC_ITIMLOCK_ASSERT(p, type) mtx_assert(&(p)->p_itimmtx, (type)) #define PROC_PROFLOCK(p) mtx_lock_spin(&(p)->p_profmtx) #define PROC_PROFUNLOCK(p) mtx_unlock_spin(&(p)->p_profmtx) #define PROC_PROFLOCK_ASSERT(p, type) mtx_assert(&(p)->p_profmtx, (type)) /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00000001 /* Process may hold a POSIX advisory lock. */ #define P_CONTROLT 0x00000002 /* Has a controlling terminal. */ #define P_KPROC 0x00000004 /* Kernel process. */ #define P_UNUSED3 0x00000008 /* --available-- */ #define P_PPWAIT 0x00000010 /* Parent is waiting for child to exec/exit. */ #define P_PROFIL 0x00000020 /* Has started profiling. */ #define P_STOPPROF 0x00000040 /* Has thread requesting to stop profiling. */ #define P_HADTHREADS 0x00000080 /* Has had threads (no cleanup shortcuts) */ #define P_SUGID 0x00000100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00000200 /* System proc: no sigs, stats or swapping. */ #define P_SINGLE_EXIT 0x00000400 /* Threads suspending should exit, not wait. */ #define P_TRACED 0x00000800 /* Debugged process being traced. */ #define P_WAITED 0x00001000 /* Someone is waiting for us. */ #define P_WEXIT 0x00002000 /* Working on exiting. */ #define P_EXEC 0x00004000 /* Process called exec. */ #define P_WKILLED 0x00008000 /* Killed, go to kernel/user boundary ASAP. */ #define P_CONTINUED 0x00010000 /* Proc has continued from a stopped state. */ #define P_STOPPED_SIG 0x00020000 /* Stopped due to SIGSTOP/SIGTSTP. */ #define P_STOPPED_TRACE 0x00040000 /* Stopped because of tracing. */ #define P_STOPPED_SINGLE 0x00080000 /* Only 1 thread can continue (not to user). */ #define P_PROTECTED 0x00100000 /* Do not kill on memory overcommit. */ #define P_SIGEVENT 0x00200000 /* Process pending signals changed. */ #define P_SINGLE_BOUNDARY 0x00400000 /* Threads should suspend at user boundary. */ #define P_HWPMC 0x00800000 /* Process is using HWPMCs */ #define P_JAILED 0x01000000 /* Process is in jail. */ #define P_TOTAL_STOP 0x02000000 /* Stopped in stop_all_proc. */ #define P_INEXEC 0x04000000 /* Process is in execve(). */ #define P_STATCHILD 0x08000000 /* Child process stopped or exited. */ #define P_INMEM 0x10000000 /* Loaded into memory. */ #define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */ #define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */ #define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) #define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* These flags are kept in p_flag2. */ #define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */ #define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */ #define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */ #define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */ #define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */ #define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */ #define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */ #define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */ #define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */ #define P2_PROTMAX_ENABLE 0x00000200 /* Force enable implied PROT_MAX. */ #define P2_PROTMAX_DISABLE 0x00000400 /* Force disable implied PROT_MAX. */ #define P2_STKGAP_DISABLE 0x00000800 /* Disable stack gap for MAP_STACK */ #define P2_STKGAP_DISABLE_EXEC 0x00001000 /* Stack gap disabled after exec */ #define P2_ITSTOPPED 0x00002000 #define P2_PTRACEREQ 0x00004000 /* Active ptrace req */ #define P2_NO_NEW_PRIVS 0x00008000 /* Ignore setuid */ #define P2_WXORX_DISABLE 0x00010000 /* WX mappings enabled */ #define P2_WXORX_ENABLE_EXEC 0x00020000 /* WXORX enabled after exec */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ #define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan list */ #define P_TREE_REAPER 0x00000004 /* Reaper of subtree */ #define P_TREE_GRPEXITED 0x00000008 /* exit1() done with job ctl */ /* * These were process status values (p_stat), now they are only used in * legacy conversion code. */ #define SIDL 1 /* Process being created by fork. */ #define SRUN 2 /* Currently runnable. */ #define SSLEEP 3 /* Sleeping on an address. */ #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ #define SWAIT 6 /* Waiting for interrupt. */ #define SLOCK 7 /* Blocked on a lock. */ #define P_MAGIC 0xbeefface #ifdef _KERNEL /* Types and flags for mi_switch(). */ #define SW_TYPE_MASK 0xff /* First 8 bits are switch type */ #define SWT_NONE 0 /* Unspecified switch. */ #define SWT_PREEMPT 1 /* Switching due to preemption. */ #define SWT_OWEPREEMPT 2 /* Switching due to owepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ #define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */ #define SWT_IWAIT 9 /* Waiting for interrupts. */ #define SWT_SUSPEND 10 /* Thread suspended. */ #define SWT_REMOTEPREEMPT 11 /* Remote processor preempted. */ #define SWT_REMOTEWAKEIDLE 12 /* Remote processor preempted idle. */ #define SWT_COUNT 13 /* Number of switch types. */ /* Flags */ #define SW_VOL 0x0100 /* Voluntary switch. */ #define SW_INVOL 0x0200 /* Involuntary switch. */ #define SW_PREEMPT 0x0400 /* The invol switch is a preemption */ /* How values for thread_single(). */ #define SINGLE_NO_EXIT 0 #define SINGLE_EXIT 1 #define SINGLE_BOUNDARY 2 #define SINGLE_ALLPROC 3 #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PARGS); MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); #endif #define FOREACH_PROC_IN_SYSTEM(p) \ LIST_FOREACH((p), &allproc, p_list) #define FOREACH_THREAD_IN_PROC(p, td) \ TAILQ_FOREACH((td), &(p)->p_threads, td_plist) #define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) /* * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit * in a pid_t, as it is used to represent "no process group". */ #define PID_MAX 99999 #define NO_PID 100000 #define THREAD0_TID NO_PID extern pid_t pid_max; #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) /* Lock and unlock a process. */ #define PROC_LOCK(p) mtx_lock(&(p)->p_mtx) #define PROC_TRYLOCK(p) mtx_trylock(&(p)->p_mtx) #define PROC_UNLOCK(p) mtx_unlock(&(p)->p_mtx) #define PROC_LOCKED(p) mtx_owned(&(p)->p_mtx) #define PROC_WAIT_UNLOCKED(p) mtx_wait_unlocked(&(p)->p_mtx) #define PROC_LOCK_ASSERT(p, type) mtx_assert(&(p)->p_mtx, (type)) /* Lock and unlock a process group. */ #define PGRP_LOCK(pg) mtx_lock(&(pg)->pg_mtx) #define PGRP_UNLOCK(pg) mtx_unlock(&(pg)->pg_mtx) #define PGRP_LOCKED(pg) mtx_owned(&(pg)->pg_mtx) #define PGRP_LOCK_ASSERT(pg, type) mtx_assert(&(pg)->pg_mtx, (type)) #define PGRP_LOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_LOCK(pg); \ } while (0) #define PGRP_UNLOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_UNLOCK(pg); \ } while (0) /* Lock and unlock a session. */ #define SESS_LOCK(s) mtx_lock(&(s)->s_mtx) #define SESS_UNLOCK(s) mtx_unlock(&(s)->s_mtx) #define SESS_LOCKED(s) mtx_owned(&(s)->s_mtx) #define SESS_LOCK_ASSERT(s, type) mtx_assert(&(s)->s_mtx, (type)) /* * Non-zero p_lock ensures that: * - exit1() is not performed until p_lock reaches zero; * - the process' threads stack are not swapped out if they are currently * not (P_INMEM). * * PHOLD() asserts that the process (except the current process) is * not exiting, increments p_lock and swaps threads stacks into memory, * if needed. * _PHOLD() is same as PHOLD(), it takes the process locked. * _PHOLD_LITE() also takes the process locked, but comparing with * _PHOLD(), it only guarantees that exit1() is not executed, * faultin() is not called. */ #define PHOLD(p) do { \ PROC_LOCK(p); \ _PHOLD(p); \ PROC_UNLOCK(p); \ } while (0) #define _PHOLD(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process %p", p)); \ (p)->p_lock++; \ if (((p)->p_flag & P_INMEM) == 0) \ faultin((p)); \ } while (0) #define _PHOLD_LITE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process %p", p)); \ (p)->p_lock++; \ } while (0) #define PROC_ASSERT_HELD(p) do { \ KASSERT((p)->p_lock > 0, ("process %p not held", p)); \ } while (0) #define PRELE(p) do { \ PROC_LOCK((p)); \ _PRELE((p)); \ PROC_UNLOCK((p)); \ } while (0) #define _PRELE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ PROC_ASSERT_HELD(p); \ (--(p)->p_lock); \ if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0) \ wakeup(&(p)->p_lock); \ } while (0) #define PROC_ASSERT_NOT_HELD(p) do { \ KASSERT((p)->p_lock == 0, ("process %p held", p)); \ } while (0) #define PROC_UPDATE_COW(p) do { \ struct proc *_p = (p); \ PROC_LOCK_ASSERT((_p), MA_OWNED); \ atomic_store_int(&_p->p_cowgen, _p->p_cowgen + 1); \ } while (0) #define PROC_COW_CHANGECOUNT(td, p) ({ \ struct thread *_td = (td); \ struct proc *_p = (p); \ MPASS(_td == curthread); \ PROC_LOCK_ASSERT(_p, MA_OWNED); \ _p->p_cowgen - _td->td_cowgen; \ }) /* Check whether a thread is safe to be swapped out. */ #define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP) /* Control whether or not it is safe for curthread to sleep. */ #define THREAD_NO_SLEEPING() do { \ curthread->td_no_sleeping++; \ MPASS(curthread->td_no_sleeping > 0); \ } while (0) #define THREAD_SLEEPING_OK() do { \ MPASS(curthread->td_no_sleeping > 0); \ curthread->td_no_sleeping--; \ } while (0) #define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) #define PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern struct sx *pidhashtbl_lock; extern u_long pidhash; extern u_long pidhashlock; #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; extern struct sx allproc_lock; extern int allproc_gen; extern struct sx proctree_lock; extern struct mtx ppeers_lock; extern struct mtx procid_lock; extern struct proc proc0; /* Process slot for swapper. */ extern struct thread0_storage thread0_st; /* Primary thread in proc0. */ #define thread0 (thread0_st.t0st_thread) extern struct vmspace vmspace0; /* VM space for proc0. */ extern int hogticks; /* Limit on kernel cpu hogs. */ extern int lastpid; extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Max procs per uid. */ extern u_long ps_arg_cache_limit; LIST_HEAD(proclist, proc); TAILQ_HEAD(procqueue, proc); TAILQ_HEAD(threadqueue, thread); extern struct proclist allproc; /* List of all processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ extern struct uma_zone *proc_zone; extern struct uma_zone *pgrp_zone; struct proc *pfind(pid_t); /* Find process by id. */ struct proc *pfind_any(pid_t); /* Find (zombie) process by id. */ struct proc *pfind_any_locked(pid_t pid); /* Find process by id, locked. */ struct pgrp *pgfind(pid_t); /* Find process group by id. */ void pidhash_slockall(void); /* Shared lock all pid hash lists. */ void pidhash_sunlockall(void); /* Shared unlock all pid hash lists. */ struct fork_req { int fr_flags; int fr_pages; int *fr_pidp; struct proc **fr_procp; int *fr_pd_fd; int fr_pd_flags; struct filecaps *fr_pd_fcaps; int fr_flags2; #define FR2_DROPSIG_CAUGHT 0x00000001 /* Drop caught non-DFL signals */ #define FR2_SHARE_PATHS 0x00000002 /* Invert sense of RFFDG for paths */ #define FR2_KPROC 0x00000004 /* Create a kernel process */ }; /* * pget() flags. */ #define PGET_HOLD 0x00001 /* Hold the process. */ #define PGET_CANSEE 0x00002 /* Check against p_cansee(). */ #define PGET_CANDEBUG 0x00004 /* Check against p_candebug(). */ #define PGET_ISCURRENT 0x00008 /* Check that the found process is current. */ #define PGET_NOTWEXIT 0x00010 /* Check that the process is not in P_WEXIT. */ #define PGET_NOTINEXEC 0x00020 /* Check that the process is not in P_INEXEC. */ #define PGET_NOTID 0x00040 /* Do not assume tid if pid > PID_MAX. */ #define PGET_WANTREAD (PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT) int pget(pid_t pid, int flags, struct proc **pp); void ast(struct trapframe *framep); struct thread *choosethread(void); int cr_cansee(struct ucred *u1, struct ucred *u2); int cr_canseesocket(struct ucred *cred, struct socket *so); int cr_canseeothergids(struct ucred *u1, struct ucred *u2); int cr_canseeotheruids(struct ucred *u1, struct ucred *u2); int cr_canseejailproc(struct ucred *u1, struct ucred *u2); int cr_cansignal(struct ucred *cred, struct proc *proc, int signum); int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess); int enterthispgrp(struct proc *p, struct pgrp *pgrp); void faultin(struct proc *p); int fork1(struct thread *, struct fork_req *); void fork_rfppwait(struct thread *); void fork_exit(void (*)(void *, struct trapframe *), void *, struct trapframe *); void fork_return(struct thread *, struct trapframe *); int inferior(struct proc *p); void itimer_proc_continue(struct proc *p); void kqtimer_proc_continue(struct proc *p); void kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry, int *resident_count, bool *super); void kern_yield(int); void kick_proc0(void); void killjobc(void); int leavepgrp(struct proc *p); int maybe_preempt(struct thread *td); void maybe_yield(void); void mi_switch(int flags); int p_candebug(struct thread *td, struct proc *p); int p_cansee(struct thread *td, struct proc *p); int p_cansched(struct thread *td, struct proc *p); int p_cansignal(struct thread *td, struct proc *p, int signum); int p_canwait(struct thread *td, struct proc *p); struct pargs *pargs_alloc(int len); void pargs_drop(struct pargs *pa); void pargs_hold(struct pargs *pa); void proc_add_orphan(struct proc *child, struct proc *parent); int proc_get_binpath(struct proc *p, char *binname, char **fullpath, char **freepath); int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); void procinit(void); int proc_iterate(int (*cb)(struct proc *, void *), void *cbarg); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); struct proc *proc_realparent(struct proc *child); void proc_reap(struct thread *td, struct proc *p, int *status, int options); void proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid); void proc_set_traced(struct proc *p, bool stop); void proc_wkilled(struct proc *p); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); void pstats_free(struct pstats *ps); void proc_clear_orphan(struct proc *p); void reaper_abandon_children(struct proc *p, bool exiting); int securelevel_ge(struct ucred *cr, int level); int securelevel_gt(struct ucred *cr, int level); void sess_hold(struct session *); void sess_release(struct session *); int setrunnable(struct thread *, int); void setsugid(struct proc *p); int should_yield(void); int sigonstack(size_t sp); void stopevent(struct proc *, u_int, u_int); struct thread *tdfind(lwpid_t, pid_t); void threadinit(void); void tidhash_add(struct thread *); void tidhash_remove(struct thread *); void cpu_idle(int); int cpu_idle_wakeup(int); extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; bool curproc_sigkilled(void); void userret(struct thread *, struct trapframe *); void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); bool cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map); int cpu_fetch_syscall_args(struct thread *td); void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *); int cpu_procctl(struct thread *td, int idtype, id_t id, int com, void *data); void cpu_set_syscall_retval(struct thread *, int); void cpu_set_upcall(struct thread *, void (*)(void *), void *, stack_t *); int cpu_set_user_tls(struct thread *, void *tls_base); void cpu_thread_alloc(struct thread *); void cpu_thread_clean(struct thread *); void cpu_thread_exit(struct thread *); void cpu_thread_free(struct thread *); void cpu_thread_swapin(struct thread *); void cpu_thread_swapout(struct thread *); struct thread *thread_alloc(int pages); int thread_alloc_stack(struct thread *, int pages); int thread_check_susp(struct thread *td, bool sleep); void thread_cow_get_proc(struct thread *newtd, struct proc *p); void thread_cow_get(struct thread *newtd, struct thread *td); void thread_cow_free(struct thread *td); void thread_cow_update(struct thread *td); void thread_cow_synced(struct thread *td); int thread_create(struct thread *td, struct rtprio *rtp, int (*initialize_thread)(struct thread *, void *), void *thunk); void thread_exit(void) __dead2; void thread_free(struct thread *td); void thread_link(struct thread *td, struct proc *p); void thread_reap_barrier(void); int thread_single(struct proc *p, int how); void thread_single_end(struct proc *p, int how); void thread_stash(struct thread *td); void thread_stopped(struct proc *p); void childproc_stopped(struct proc *child, int reason); void childproc_continued(struct proc *child); void childproc_exited(struct proc *child); void thread_run_flash(struct thread *td); int thread_suspend_check(int how); bool thread_suspend_check_needed(void); void thread_suspend_switch(struct thread *, struct proc *p); void thread_suspend_one(struct thread *td); void thread_unlink(struct thread *td); void thread_unsuspend(struct proc *p); void thread_wait(struct proc *p); void stop_all_proc(void); void resume_all_proc(void); static __inline int curthread_pflags_set(int flags) { struct thread *td; int save; td = curthread; save = ~flags | (td->td_pflags & flags); td->td_pflags |= flags; return (save); } static __inline void curthread_pflags_restore(int save) { curthread->td_pflags &= save; } static __inline int curthread_pflags2_set(int flags) { struct thread *td; int save; td = curthread; save = ~flags | (td->td_pflags2 & flags); td->td_pflags2 |= flags; return (save); } static __inline void curthread_pflags2_restore(int save) { curthread->td_pflags2 &= save; } static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= td->td_kstack && va + len >= va && va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE); } static __inline __pure2 struct td_sched * td_get_sched(struct thread *td) { return ((struct td_sched *)&td[1]); } extern void (*softdep_ast_cleanup)(struct thread *); static __inline void td_softdep_cleanup(struct thread *td) { if (td->td_su != NULL && softdep_ast_cleanup != NULL) softdep_ast_cleanup(td); } #define PROC_ID_PID 0 #define PROC_ID_GROUP 1 #define PROC_ID_SESSION 2 #define PROC_ID_REAP 3 void proc_id_set(int type, pid_t id); void proc_id_set_cond(int type, pid_t id); void proc_id_clear(int type, pid_t id); EVENTHANDLER_LIST_DECLARE(process_ctor); EVENTHANDLER_LIST_DECLARE(process_dtor); EVENTHANDLER_LIST_DECLARE(process_init); EVENTHANDLER_LIST_DECLARE(process_fini); EVENTHANDLER_LIST_DECLARE(process_exit); EVENTHANDLER_LIST_DECLARE(process_fork); EVENTHANDLER_LIST_DECLARE(process_exec); EVENTHANDLER_LIST_DECLARE(thread_ctor); EVENTHANDLER_LIST_DECLARE(thread_dtor); EVENTHANDLER_LIST_DECLARE(thread_init); #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h index 197ac1692dfb..80797f290a41 100644 --- a/sys/sys/ptrace.h +++ b/sys/sys/ptrace.h @@ -1,260 +1,260 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1984, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ptrace.h 8.2 (Berkeley) 1/4/94 * $FreeBSD$ */ #ifndef _SYS_PTRACE_H_ #define _SYS_PTRACE_H_ #include #include #include #define PT_TRACE_ME 0 /* child declares it's being traced */ #define PT_READ_I 1 /* read word in child's I space */ #define PT_READ_D 2 /* read word in child's D space */ /* was PT_READ_U 3 * read word in child's user structure */ #define PT_WRITE_I 4 /* write word in child's I space */ #define PT_WRITE_D 5 /* write word in child's D space */ /* was PT_WRITE_U 6 * write word in child's user structure */ #define PT_CONTINUE 7 /* continue the child */ #define PT_KILL 8 /* kill the child process */ #define PT_STEP 9 /* single step the child */ #define PT_ATTACH 10 /* trace some running process */ #define PT_DETACH 11 /* stop tracing a process */ #define PT_IO 12 /* do I/O to/from stopped process. */ #define PT_LWPINFO 13 /* Info about the LWP that stopped. */ #define PT_GETNUMLWPS 14 /* get total number of threads */ #define PT_GETLWPLIST 15 /* get thread list */ #define PT_CLEARSTEP 16 /* turn off single step */ #define PT_SETSTEP 17 /* turn on single step */ #define PT_SUSPEND 18 /* suspend a thread */ #define PT_RESUME 19 /* resume a thread */ #define PT_TO_SCE 20 #define PT_TO_SCX 21 #define PT_SYSCALL 22 #define PT_FOLLOW_FORK 23 #define PT_LWP_EVENTS 24 /* report LWP birth and exit */ #define PT_GET_EVENT_MASK 25 /* get mask of optional events */ #define PT_SET_EVENT_MASK 26 /* set mask of optional events */ #define PT_GET_SC_ARGS 27 /* fetch syscall args */ #define PT_GET_SC_RET 28 /* fetch syscall results */ #define PT_COREDUMP 29 /* create a coredump */ #define PT_GETREGS 33 /* get general-purpose registers */ #define PT_SETREGS 34 /* set general-purpose registers */ #define PT_GETFPREGS 35 /* get floating-point registers */ #define PT_SETFPREGS 36 /* set floating-point registers */ #define PT_GETDBREGS 37 /* get debugging registers */ #define PT_SETDBREGS 38 /* set debugging registers */ #define PT_VM_TIMESTAMP 40 /* Get VM version (timestamp) */ #define PT_VM_ENTRY 41 /* Get VM map (entry) */ #define PT_GETREGSET 42 /* Get a target register set */ #define PT_SETREGSET 43 /* Set a target register set */ #define PT_FIRSTMACH 64 /* for machine-specific requests */ #include /* machine-specific requests, if any */ /* Events used with PT_GET_EVENT_MASK and PT_SET_EVENT_MASK */ #define PTRACE_EXEC 0x0001 #define PTRACE_SCE 0x0002 #define PTRACE_SCX 0x0004 #define PTRACE_SYSCALL (PTRACE_SCE | PTRACE_SCX) #define PTRACE_FORK 0x0008 #define PTRACE_LWP 0x0010 #define PTRACE_VFORK 0x0020 #define PTRACE_DEFAULT (PTRACE_EXEC) struct ptrace_io_desc { int piod_op; /* I/O operation */ void *piod_offs; /* child offset */ void *piod_addr; /* parent offset */ size_t piod_len; /* request length */ }; /* * Operations in piod_op. */ #define PIOD_READ_D 1 /* Read from D space */ #define PIOD_WRITE_D 2 /* Write to D space */ #define PIOD_READ_I 3 /* Read from I space */ #define PIOD_WRITE_I 4 /* Write to I space */ /* Argument structure for PT_LWPINFO. */ struct ptrace_lwpinfo { lwpid_t pl_lwpid; /* LWP described. */ int pl_event; /* Event that stopped the LWP. */ #define PL_EVENT_NONE 0 #define PL_EVENT_SIGNAL 1 int pl_flags; /* LWP flags. */ #define PL_FLAG_SA 0x01 /* M:N thread */ #define PL_FLAG_BOUND 0x02 /* M:N bound thread */ #define PL_FLAG_SCE 0x04 /* syscall enter point */ #define PL_FLAG_SCX 0x08 /* syscall leave point */ #define PL_FLAG_EXEC 0x10 /* exec(2) succeeded */ #define PL_FLAG_SI 0x20 /* siginfo is valid */ #define PL_FLAG_FORKED 0x40 /* new child */ #define PL_FLAG_CHILD 0x80 /* I am from child */ #define PL_FLAG_BORN 0x100 /* new LWP */ #define PL_FLAG_EXITED 0x200 /* exiting LWP */ #define PL_FLAG_VFORKED 0x400 /* new child via vfork */ #define PL_FLAG_VFORK_DONE 0x800 /* vfork parent has resumed */ sigset_t pl_sigmask; /* LWP signal mask */ sigset_t pl_siglist; /* LWP pending signal */ struct __siginfo pl_siginfo; /* siginfo for signal */ char pl_tdname[MAXCOMLEN + 1]; /* LWP name */ pid_t pl_child_pid; /* New child pid */ u_int pl_syscall_code; u_int pl_syscall_narg; }; #if defined(_WANT_LWPINFO32) || (defined(_KERNEL) && defined(__LP64__)) struct ptrace_lwpinfo32 { lwpid_t pl_lwpid; /* LWP described. */ int pl_event; /* Event that stopped the LWP. */ int pl_flags; /* LWP flags. */ sigset_t pl_sigmask; /* LWP signal mask */ sigset_t pl_siglist; /* LWP pending signal */ struct siginfo32 pl_siginfo; /* siginfo for signal */ char pl_tdname[MAXCOMLEN + 1]; /* LWP name. */ pid_t pl_child_pid; /* New child pid */ u_int pl_syscall_code; u_int pl_syscall_narg; }; #endif /* Argument structure for PT_GET_SC_RET. */ struct ptrace_sc_ret { - register_t sr_retval[2]; /* Only valid if sr_error == 0. */ + syscallarg_t sr_retval[2]; /* Only valid if sr_error == 0. */ int sr_error; }; /* Argument structure for PT_VM_ENTRY. */ struct ptrace_vm_entry { int pve_entry; /* Entry number used for iteration. */ int pve_timestamp; /* Generation number of VM map. */ u_long pve_start; /* Start VA of range. */ u_long pve_end; /* End VA of range (incl). */ u_long pve_offset; /* Offset in backing object. */ u_int pve_prot; /* Protection of memory range. */ u_int pve_pathlen; /* Size of path. */ long pve_fileid; /* File ID. */ uint32_t pve_fsid; /* File system ID. */ char *pve_path; /* Path name of object. */ }; /* Argument structure for PT_COREDUMP */ struct ptrace_coredump { int pc_fd; /* File descriptor to write dump to. */ uint32_t pc_flags; /* Flags PC_* */ off_t pc_limit; /* Maximum size of the coredump, 0 for no limit. */ }; /* Flags for PT_COREDUMP pc_flags */ #define PC_COMPRESS 0x00000001 /* Allow compression */ #define PC_ALL 0x00000002 /* Include non-dumpable entries */ #ifdef _KERNEL struct thr_coredump_req { struct vnode *tc_vp; /* vnode to write coredump to. */ off_t tc_limit; /* max coredump file size. */ int tc_flags; /* user flags */ int tc_error; /* request result */ }; int ptrace_set_pc(struct thread *_td, unsigned long _addr); int ptrace_single_step(struct thread *_td); int ptrace_clear_single_step(struct thread *_td); #ifdef __HAVE_PTRACE_MACHDEP int cpu_ptrace(struct thread *_td, int _req, void *_addr, int _data); #endif /* * These are prototypes for functions that implement some of the * debugging functionality exported by procfs / linprocfs and by the * ptrace(2) syscall. They used to be part of procfs, but they don't * really belong there. */ struct reg; struct fpreg; struct dbreg; struct uio; int proc_read_regs(struct thread *_td, struct reg *_reg); int proc_write_regs(struct thread *_td, struct reg *_reg); int proc_read_fpregs(struct thread *_td, struct fpreg *_fpreg); int proc_write_fpregs(struct thread *_td, struct fpreg *_fpreg); int proc_read_dbregs(struct thread *_td, struct dbreg *_dbreg); int proc_write_dbregs(struct thread *_td, struct dbreg *_dbreg); int proc_sstep(struct thread *_td); int proc_rwmem(struct proc *_p, struct uio *_uio); ssize_t proc_readmem(struct thread *_td, struct proc *_p, vm_offset_t _va, void *_buf, size_t _len); ssize_t proc_writemem(struct thread *_td, struct proc *_p, vm_offset_t _va, void *_buf, size_t _len); #ifdef COMPAT_FREEBSD32 struct reg32; struct fpreg32; struct dbreg32; int proc_read_regs32(struct thread *_td, struct reg32 *_reg32); int proc_write_regs32(struct thread *_td, struct reg32 *_reg32); int proc_read_fpregs32(struct thread *_td, struct fpreg32 *_fpreg32); int proc_write_fpregs32(struct thread *_td, struct fpreg32 *_fpreg32); int proc_read_dbregs32(struct thread *_td, struct dbreg32 *_dbreg32); int proc_write_dbregs32(struct thread *_td, struct dbreg32 *_dbreg32); #endif void ptrace_unsuspend(struct proc *p); extern bool allow_ptrace; #else /* !_KERNEL */ #include __BEGIN_DECLS int ptrace(int _request, pid_t _pid, caddr_t _addr, int _data); __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_PTRACE_H_ */ diff --git a/sys/sys/types.h b/sys/sys/types.h index 66f11aa3e31d..d37ab8b823dc 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -1,437 +1,439 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)types.h 8.6 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_TYPES_H_ #define _SYS_TYPES_H_ #include /* Machine type dependent parameters. */ #include #include #include #if __BSD_VISIBLE typedef unsigned char u_char; typedef unsigned short u_short; typedef unsigned int u_int; typedef unsigned long u_long; #ifndef _KERNEL typedef unsigned short ushort; /* Sys V compatibility */ typedef unsigned int uint; /* Sys V compatibility */ #endif #endif /* * XXX POSIX sized integrals that should appear only in . */ #include typedef __uint8_t u_int8_t; /* unsigned integrals (deprecated) */ typedef __uint16_t u_int16_t; typedef __uint32_t u_int32_t; typedef __uint64_t u_int64_t; typedef __uint64_t u_quad_t; /* quads (deprecated) */ typedef __int64_t quad_t; typedef quad_t * qaddr_t; typedef char * caddr_t; /* core address */ typedef const char * c_caddr_t; /* core address, pointer to const */ #ifndef _BLKSIZE_T_DECLARED typedef __blksize_t blksize_t; #define _BLKSIZE_T_DECLARED #endif typedef __cpuwhich_t cpuwhich_t; typedef __cpulevel_t cpulevel_t; typedef __cpusetid_t cpusetid_t; #ifndef _BLKCNT_T_DECLARED typedef __blkcnt_t blkcnt_t; #define _BLKCNT_T_DECLARED #endif #ifndef _CLOCK_T_DECLARED typedef __clock_t clock_t; #define _CLOCK_T_DECLARED #endif #ifndef _CLOCKID_T_DECLARED typedef __clockid_t clockid_t; #define _CLOCKID_T_DECLARED #endif typedef __critical_t critical_t; /* Critical section value */ typedef __daddr_t daddr_t; /* disk address */ #ifndef _DEV_T_DECLARED typedef __dev_t dev_t; /* device number or struct cdev */ #define _DEV_T_DECLARED #endif #ifndef _FFLAGS_T_DECLARED typedef __fflags_t fflags_t; /* file flags */ #define _FFLAGS_T_DECLARED #endif typedef __fixpt_t fixpt_t; /* fixed point number */ #ifndef _FSBLKCNT_T_DECLARED /* for statvfs() */ typedef __fsblkcnt_t fsblkcnt_t; typedef __fsfilcnt_t fsfilcnt_t; #define _FSBLKCNT_T_DECLARED #endif #ifndef _GID_T_DECLARED typedef __gid_t gid_t; /* group id */ #define _GID_T_DECLARED #endif #ifndef _IN_ADDR_T_DECLARED typedef __uint32_t in_addr_t; /* base type for internet address */ #define _IN_ADDR_T_DECLARED #endif #ifndef _IN_PORT_T_DECLARED typedef __uint16_t in_port_t; #define _IN_PORT_T_DECLARED #endif #ifndef _ID_T_DECLARED typedef __id_t id_t; /* can hold a uid_t or pid_t */ #define _ID_T_DECLARED #endif #ifndef _INO_T_DECLARED typedef __ino_t ino_t; /* inode number */ #define _INO_T_DECLARED #endif #ifndef _KEY_T_DECLARED typedef __key_t key_t; /* IPC key (for Sys V IPC) */ #define _KEY_T_DECLARED #endif #ifndef _LWPID_T_DECLARED typedef __lwpid_t lwpid_t; /* Thread ID (a.k.a. LWP) */ #define _LWPID_T_DECLARED #endif #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; /* permissions */ #define _MODE_T_DECLARED #endif #ifndef _ACCMODE_T_DECLARED typedef __accmode_t accmode_t; /* access permissions */ #define _ACCMODE_T_DECLARED #endif #ifndef _NLINK_T_DECLARED typedef __nlink_t nlink_t; /* link count */ #define _NLINK_T_DECLARED #endif #ifndef _OFF_T_DECLARED typedef __off_t off_t; /* file offset */ #define _OFF_T_DECLARED #endif #ifndef _OFF64_T_DECLARED typedef __off64_t off64_t; /* file offset (alias) */ #define _OFF64_T_DECLARED #endif #ifndef _PID_T_DECLARED typedef __pid_t pid_t; /* process id */ #define _PID_T_DECLARED #endif typedef __register_t register_t; #ifndef _RLIM_T_DECLARED typedef __rlim_t rlim_t; /* resource limit */ #define _RLIM_T_DECLARED #endif typedef __sbintime_t sbintime_t; typedef __segsz_t segsz_t; /* segment size (in pages) */ #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef _SSIZE_T_DECLARED typedef __ssize_t ssize_t; #define _SSIZE_T_DECLARED #endif #ifndef _SUSECONDS_T_DECLARED typedef __suseconds_t suseconds_t; /* microseconds (signed) */ #define _SUSECONDS_T_DECLARED #endif #ifndef _TIME_T_DECLARED typedef __time_t time_t; #define _TIME_T_DECLARED #endif #ifndef _TIMER_T_DECLARED typedef __timer_t timer_t; #define _TIMER_T_DECLARED #endif #ifndef _MQD_T_DECLARED typedef __mqd_t mqd_t; #define _MQD_T_DECLARED #endif typedef __u_register_t u_register_t; #ifndef _UID_T_DECLARED typedef __uid_t uid_t; /* user id */ #define _UID_T_DECLARED #endif #ifndef _USECONDS_T_DECLARED typedef __useconds_t useconds_t; /* microseconds (unsigned) */ #define _USECONDS_T_DECLARED #endif #ifndef _CAP_IOCTL_T_DECLARED #define _CAP_IOCTL_T_DECLARED typedef unsigned long cap_ioctl_t; #endif #ifndef _CAP_RIGHTS_T_DECLARED #define _CAP_RIGHTS_T_DECLARED struct cap_rights; typedef struct cap_rights cap_rights_t; #endif /* * Types suitable for exporting physical addresses, virtual addresses * (pointers), and memory object sizes from the kernel independent of native * word size. These should be used in place of vm_paddr_t, (u)intptr_t, and * size_t in structs which contain such types that are shared with userspace. */ typedef __uint64_t kpaddr_t; typedef __uint64_t kvaddr_t; typedef __uint64_t ksize_t; typedef __int64_t kssize_t; typedef __vm_offset_t vm_offset_t; typedef __uint64_t vm_ooffset_t; typedef __vm_paddr_t vm_paddr_t; typedef __uint64_t vm_pindex_t; typedef __vm_size_t vm_size_t; typedef __rman_res_t rman_res_t; +typedef __register_t syscallarg_t; + #ifdef _KERNEL typedef int boolean_t; typedef struct _device *device_t; typedef __intfptr_t intfptr_t; /* * XXX this is fixed width for historical reasons. It should have had type * __int_fast32_t. Fixed-width types should not be used unless binary * compatibility is essential. Least-width types should be used even less * since they provide smaller benefits. * * XXX should be MD. * * XXX this is bogus in -current, but still used for spl*(). */ typedef __uint32_t intrmask_t; /* Interrupt mask (spl, xxx_imask...) */ typedef __uintfptr_t uintfptr_t; typedef __uint64_t uoff_t; typedef char vm_memattr_t; /* memory attribute codes */ typedef struct vm_page *vm_page_t; #define offsetof(type, field) __offsetof(type, field) #endif /* _KERNEL */ #if defined(_KERNEL) || defined(_STANDALONE) #if !defined(__bool_true_false_are_defined) && !defined(__cplusplus) #define __bool_true_false_are_defined 1 #define false 0 #define true 1 typedef _Bool bool; #endif /* !__bool_true_false_are_defined && !__cplusplus */ #endif /* KERNEL || _STANDALONE */ /* * The following are all things that really shouldn't exist in this header, * since its purpose is to provide typedefs, not miscellaneous doodads. */ #ifdef __POPCNT__ #define __bitcount64(x) __builtin_popcountll((__uint64_t)(x)) #define __bitcount32(x) __builtin_popcount((__uint32_t)(x)) #define __bitcount16(x) __builtin_popcount((__uint16_t)(x)) #define __bitcountl(x) __builtin_popcountl((unsigned long)(x)) #define __bitcount(x) __builtin_popcount((unsigned int)(x)) #else /* * Population count algorithm using SWAR approach * - "SIMD Within A Register". */ static __inline __uint16_t __bitcount16(__uint16_t _x) { _x = (_x & 0x5555) + ((_x & 0xaaaa) >> 1); _x = (_x & 0x3333) + ((_x & 0xcccc) >> 2); _x = (_x + (_x >> 4)) & 0x0f0f; _x = (_x + (_x >> 8)) & 0x00ff; return (_x); } static __inline __uint32_t __bitcount32(__uint32_t _x) { _x = (_x & 0x55555555) + ((_x & 0xaaaaaaaa) >> 1); _x = (_x & 0x33333333) + ((_x & 0xcccccccc) >> 2); _x = (_x + (_x >> 4)) & 0x0f0f0f0f; _x = (_x + (_x >> 8)); _x = (_x + (_x >> 16)) & 0x000000ff; return (_x); } #ifdef __LP64__ static __inline __uint64_t __bitcount64(__uint64_t _x) { _x = (_x & 0x5555555555555555) + ((_x & 0xaaaaaaaaaaaaaaaa) >> 1); _x = (_x & 0x3333333333333333) + ((_x & 0xcccccccccccccccc) >> 2); _x = (_x + (_x >> 4)) & 0x0f0f0f0f0f0f0f0f; _x = (_x + (_x >> 8)); _x = (_x + (_x >> 16)); _x = (_x + (_x >> 32)) & 0x000000ff; return (_x); } #define __bitcountl(x) __bitcount64((unsigned long)(x)) #else static __inline __uint64_t __bitcount64(__uint64_t _x) { return (__bitcount32(_x >> 32) + __bitcount32(_x)); } #define __bitcountl(x) __bitcount32((unsigned long)(x)) #endif #define __bitcount(x) __bitcount32((unsigned int)(x)) #endif #if __BSD_VISIBLE #include /* * The major and minor numbers are encoded in dev_t as MMMmmmMm (where * letters correspond to bytes). The encoding of the lower 4 bytes is * constrained by compatibility with 16-bit and 32-bit dev_t's. The * encoding of of the upper 4 bytes is the least unnatural one consistent * with this and other constraints. Also, the decoding of the m bytes by * minor() is unnatural to maximize compatibility subject to not discarding * bits. The upper m byte is shifted into the position of the lower M byte * instead of shifting 3 upper m bytes to close the gap. Compatibility for * minor() is achieved iff the upper m byte is 0. */ #define major(d) __major(d) static __inline int __major(dev_t _d) { return (((_d >> 32) & 0xffffff00) | ((_d >> 8) & 0xff)); } #define minor(d) __minor(d) static __inline int __minor(dev_t _d) { return (((_d >> 24) & 0xff00) | (_d & 0xffff00ff)); } #define makedev(M, m) __makedev((M), (m)) static __inline dev_t __makedev(int _Major, int _Minor) { return (((dev_t)(_Major & 0xffffff00) << 32) | ((_Major & 0xff) << 8) | ((dev_t)(_Minor & 0xff00) << 24) | (_Minor & 0xffff00ff)); } /* * These declarations belong elsewhere, but are repeated here and in * to give broken programs a better chance of working with * 64-bit off_t's. */ #ifndef _KERNEL __BEGIN_DECLS #ifndef _FTRUNCATE_DECLARED #define _FTRUNCATE_DECLARED int ftruncate(int, off_t); #endif #ifndef _LSEEK_DECLARED #define _LSEEK_DECLARED off_t lseek(int, off_t, int); #endif #ifndef _MMAP_DECLARED #define _MMAP_DECLARED void * mmap(void *, size_t, int, int, int, off_t); #endif #ifndef _TRUNCATE_DECLARED #define _TRUNCATE_DECLARED int truncate(const char *, off_t); #endif __END_DECLS #endif /* !_KERNEL */ #endif /* __BSD_VISIBLE */ #endif /* !_SYS_TYPES_H_ */ diff --git a/sys/tools/makesyscalls.lua b/sys/tools/makesyscalls.lua index 178869d03b0e..ec79ae30d130 100644 --- a/sys/tools/makesyscalls.lua +++ b/sys/tools/makesyscalls.lua @@ -1,1620 +1,1620 @@ -- -- SPDX-License-Identifier: BSD-2-Clause-FreeBSD -- -- Copyright (c) 2019 Kyle Evans -- -- Redistribution and use in source and binary forms, with or without -- modification, are permitted provided that the following conditions -- are met: -- 1. Redistributions of source code must retain the above copyright -- notice, this list of conditions and the following disclaimer. -- 2. Redistributions in binary form must reproduce the above copyright -- notice, this list of conditions and the following disclaimer in the -- documentation and/or other materials provided with the distribution. -- -- THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -- SUCH DAMAGE. -- -- $FreeBSD$ -- -- We generally assume that this script will be run by flua, however we've -- carefully crafted modules for it that mimic interfaces provided by modules -- available in ports. Currently, this script is compatible with lua from ports -- along with the compatible luafilesystem and lua-posix modules. local lfs = require("lfs") local unistd = require("posix.unistd") local savesyscall = -1 local maxsyscall = -1 local generated_tag = "@" .. "generated" -- Default configuration; any of these may get replaced by a configuration file -- optionally specified. local config = { os_id_keyword = "FreeBSD", abi_func_prefix = "", sysnames = "syscalls.c", sysproto = "../sys/sysproto.h", sysproto_h = "_SYS_SYSPROTO_H_", syshdr = "../sys/syscall.h", sysmk = "../sys/syscall.mk", syssw = "init_sysent.c", syscallprefix = "SYS_", switchname = "sysent", namesname = "syscallnames", systrace = "systrace_args.c", capabilities_conf = "capabilities.conf", capenabled = {}, compat_set = "native", mincompat = 0, abi_type_suffix = "", abi_flags = "", abi_flags_mask = 0, abi_headers = "", abi_intptr_t = "intptr_t", abi_size_t = "size_t", abi_u_long = "u_long", abi_long = "long", abi_semid_t = "semid_t", abi_ptr_array_t = "", ptr_intptr_t_cast = "intptr_t", syscall_abi_change = "", sys_abi_change = {}, syscall_no_abi_change = "", sys_no_abi_change = {}, obsol = "", obsol_dict = {}, unimpl = "", unimpl_dict = {}, } local config_modified = {} local cleantmp = true local tmpspace = "/tmp/sysent." .. unistd.getpid() .. "/" local output_files = { "sysnames", "syshdr", "sysmk", "syssw", "systrace", "sysproto", } -- These ones we'll create temporary files for; generation purposes. local temp_files = { "sysaue", "sysdcl", "syscompat", "syscompatdcl", "sysent", "sysinc", "sysarg", "sysprotoend", "systracetmp", "systraceret", } -- Opened files local files = {} local function cleanup() for _, v in pairs(files) do assert(v:close()) end if cleantmp then if lfs.dir(tmpspace) then for fname in lfs.dir(tmpspace) do if fname ~= "." and fname ~= ".." then assert(os.remove(tmpspace .. "/" .. fname)) end end end if lfs.attributes(tmpspace) and not lfs.rmdir(tmpspace) then assert(io.stderr:write("Failed to clean up tmpdir: " .. tmpspace .. "\n")) end else assert(io.stderr:write("Temp files left in " .. tmpspace .. "\n")) end end local function abort(status, msg) assert(io.stderr:write(msg .. "\n")) cleanup() os.exit(status) end -- Each entry should have a value so we can represent abi flags as a bitmask -- for convenience. One may also optionally provide an expr; this gets applied -- to each argument type to indicate whether this argument is subject to ABI -- change given the configured flags. local known_abi_flags = { long_size = { value = 0x00000001, exprs = { "_Contains[a-z_]*_long_", "^long [a-z0-9_]+$", "long [*]", "size_t [*]", -- semid_t is not included because it is only used -- as an argument or written out individually and -- said writes are handled by the ksem framework. -- Technically a sign-extension issue exists for -- arguments, but because semid_t is actually a file -- descriptor negative 32-bit values are invalid -- regardless of sign-extension. }, }, time_t_size = { value = 0x00000002, exprs = { "_Contains[a-z_]*_timet_", }, }, pointer_args = { value = 0x00000004, }, pointer_size = { value = 0x00000008, exprs = { "_Contains[a-z_]*_ptr_", "[*][*]", }, }, pair_64bit = { value = 0x00000010, exprs = { "^dev_t[ ]*$", "^id_t[ ]*$", "^off_t[ ]*$", }, }, } local known_flags = { STD = 0x00000001, OBSOL = 0x00000002, RESERVED = 0x00000004, UNIMPL = 0x00000008, NODEF = 0x00000010, NOARGS = 0x00000020, NOPROTO = 0x00000040, NOSTD = 0x00000080, NOTSTATIC = 0x00000100, CAPENABLED = 0x00000200, SYSMUX = 0x00000400, -- Compat flags start from here. We have plenty of space. } -- All compat option entries should have five entries: -- definition: The preprocessor macro that will be set for this -- compatlevel: The level this compatibility should be included at. This -- generally represents the version of FreeBSD that it is compatible -- with, but ultimately it's just the level of mincompat in which it's -- included. -- flag: The name of the flag in syscalls.master. -- prefix: The prefix to use for _args and syscall prototype. This will be -- used as-is, without "_" or any other character appended. -- descr: The description of this compat option in init_sysent.c comments. -- The special "stdcompat" entry will cause the other five to be autogenerated. local compat_option_sets = { native = { { definition = "COMPAT_43", compatlevel = 3, flag = "COMPAT", prefix = "o", descr = "old", }, { stdcompat = "FREEBSD4" }, { stdcompat = "FREEBSD6" }, { stdcompat = "FREEBSD7" }, { stdcompat = "FREEBSD10" }, { stdcompat = "FREEBSD11" }, { stdcompat = "FREEBSD12" }, { stdcompat = "FREEBSD13" }, }, } -- compat_options will be resolved to a set from the configuration. local compat_options local function trim(s, char) if s == nil then return nil end if char == nil then char = "%s" end return s:gsub("^" .. char .. "+", ""):gsub(char .. "+$", "") end -- config looks like a shell script; in fact, the previous makesyscalls.sh -- script actually sourced it in. It had a pretty common format, so we should -- be fine to make various assumptions local function process_config(file) local cfg = {} local comment_line_expr = "^%s*#.*" -- We capture any whitespace padding here so we can easily advance to -- the end of the line as needed to check for any trailing bogus bits. -- Alternatively, we could drop the whitespace and instead try to -- use a pattern to strip out the meaty part of the line, but then we -- would need to sanitize the line for potentially special characters. local line_expr = "^([%w%p]+%s*)=(%s*[`\"]?[^\"`]*[`\"]?)" if not file then return nil, "No file given" end local fh = assert(io.open(file)) for nextline in fh:lines() do -- Strip any whole-line comments nextline = nextline:gsub(comment_line_expr, "") -- Parse it into key, value pairs local key, value = nextline:match(line_expr) if key ~= nil and value ~= nil then local kvp = key .. "=" .. value key = trim(key) value = trim(value) local delim = value:sub(1,1) if delim == '"' then local trailing_context -- Strip off the key/value part trailing_context = nextline:sub(kvp:len() + 1) -- Strip off any trailing comment trailing_context = trailing_context:gsub("#.*$", "") -- Strip off leading/trailing whitespace trailing_context = trim(trailing_context) if trailing_context ~= "" then print(trailing_context) abort(1, "Malformed line: " .. nextline) end value = trim(value, delim) else -- Strip off potential comments value = value:gsub("#.*$", "") -- Strip off any padding whitespace value = trim(value) if value:match("%s") then abort(1, "Malformed config line: " .. nextline) end end cfg[key] = value elseif not nextline:match("^%s*$") then -- Make sure format violations don't get overlooked -- here, but ignore blank lines. Comments are already -- stripped above. abort(1, "Malformed config line: " .. nextline) end end assert(io.close(fh)) return cfg end local function grab_capenabled(file, open_fail_ok) local capentries = {} local commentExpr = "#.*" if file == nil then print "No file" return {} end local fh = io.open(file) if fh == nil then if not open_fail_ok then abort(1, "Failed to open " .. file) end return {} end for nextline in fh:lines() do -- Strip any comments nextline = nextline:gsub(commentExpr, "") if nextline ~= "" then capentries[nextline] = true end end assert(io.close(fh)) return capentries end local function process_compat() local nval = 0 for _, v in pairs(known_flags) do if v > nval then nval = v end end nval = nval << 1 for _, v in pairs(compat_options) do if v["stdcompat"] ~= nil then local stdcompat = v["stdcompat"] v["definition"] = "COMPAT_" .. stdcompat:upper() v["compatlevel"] = tonumber(stdcompat:match("([0-9]+)$")) v["flag"] = stdcompat:gsub("FREEBSD", "COMPAT") v["prefix"] = stdcompat:lower() .. "_" v["descr"] = stdcompat:lower() end local tmpname = "sys" .. v["flag"]:lower() local dcltmpname = tmpname .. "dcl" files[tmpname] = io.tmpfile() files[dcltmpname] = io.tmpfile() v["tmp"] = tmpname v["dcltmp"] = dcltmpname known_flags[v["flag"]] = nval v["mask"] = nval nval = nval << 1 v["count"] = 0 end end local function process_abi_flags() local flags, mask = config["abi_flags"], 0 for txtflag in flags:gmatch("([^|]+)") do if known_abi_flags[txtflag] == nil then abort(1, "Unknown abi_flag: " .. txtflag) end mask = mask | known_abi_flags[txtflag]["value"] end config["abi_flags_mask"] = mask end local function process_obsol() local obsol = config["obsol"] for syscall in obsol:gmatch("([^ ]+)") do config["obsol_dict"][syscall] = true end end local function process_unimpl() local unimpl = config["unimpl"] for syscall in unimpl:gmatch("([^ ]+)") do config["unimpl_dict"][syscall] = true end end local function process_syscall_abi_change() local changes_abi = config["syscall_abi_change"] for syscall in changes_abi:gmatch("([^ ]+)") do config["sys_abi_change"][syscall] = true end local no_changes = config["syscall_no_abi_change"] for syscall in no_changes:gmatch("([^ ]+)") do config["sys_no_abi_change"][syscall] = true end end local function abi_changes(name) if known_abi_flags[name] == nil then abort(1, "abi_changes: unknown flag: " .. name) end return config["abi_flags_mask"] & known_abi_flags[name]["value"] ~= 0 end local function strip_abi_prefix(funcname) local abiprefix = config["abi_func_prefix"] local stripped_name if funcname == nil then return nil end if abiprefix ~= "" and funcname:find("^" .. abiprefix) then stripped_name = funcname:gsub("^" .. abiprefix, "") else stripped_name = funcname end return stripped_name end local function read_file(tmpfile) if files[tmpfile] == nil then print("Not found: " .. tmpfile) return end local fh = files[tmpfile] assert(fh:seek("set")) return assert(fh:read("a")) end local function write_line(tmpfile, line) if files[tmpfile] == nil then print("Not found: " .. tmpfile) return end assert(files[tmpfile]:write(line)) end local function write_line_pfile(tmppat, line) for k in pairs(files) do if k:match(tmppat) ~= nil then assert(files[k]:write(line)) end end end -- Check both literal intptr_t and the abi version because this needs -- to work both before and after the substitution local function isptrtype(type) return type:find("*") or type:find("caddr_t") or type:find("intptr_t") or type:find(config['abi_intptr_t']) end local function isptrarraytype(type) return type:find("[*][*]") or type:find("[*][ ]*const[ ]*[*]") end -- Find types that are always 64-bits wide local function is64bittype(type) return type:find("^dev_t[ ]*$") or type:find("^id_t[ ]*$") or type:find("^off_t[ ]*$") end local process_syscall_def -- These patterns are processed in order on any line that isn't empty. local pattern_table = { { pattern = "%s*$" .. config['os_id_keyword'], process = function(_, _) -- Ignore... ID tag end, }, { dump_prevline = true, pattern = "^#%s*include", process = function(line) line = line .. "\n" write_line('sysinc', line) end, }, { dump_prevline = true, pattern = "^#", process = function(line) if line:find("^#%s*if") then savesyscall = maxsyscall elseif line:find("^#%s*else") then maxsyscall = savesyscall end line = line .. "\n" write_line('sysent', line) write_line('sysdcl', line) write_line('sysarg', line) write_line_pfile('syscompat[0-9]*$', line) write_line('sysnames', line) write_line_pfile('systrace.*', line) end, }, { dump_prevline = true, pattern = "%%ABI_HEADERS%%", process = function() if config['abi_headers'] ~= "" then line = config['abi_headers'] .. "\n" write_line('sysinc', line) end end, }, { -- Buffer anything else pattern = ".+", process = function(line, prevline) local incomplete = line:find("\\$") ~= nil -- Lines that end in \ get the \ stripped -- Lines that start with a syscall number, prepend \n line = trim(line):gsub("\\$", "") if line:find("^[0-9]") and prevline then process_syscall_def(prevline) prevline = nil end prevline = (prevline or '') .. line incomplete = incomplete or prevline:find(",$") ~= nil incomplete = incomplete or prevline:find("{") ~= nil and prevline:find("}") == nil if prevline:find("^[0-9]") and not incomplete then process_syscall_def(prevline) prevline = nil end return prevline end, }, } local function process_sysfile(file) local capentries = {} local commentExpr = "^%s*;.*" if file == nil then print "No file" return {} end local fh = io.open(file) if fh == nil then print("Failed to open " .. file) return {} end local function do_match(nextline, prevline) local pattern, handler, dump for _, v in pairs(pattern_table) do pattern = v['pattern'] handler = v['process'] dump = v['dump_prevline'] if nextline:match(pattern) then if dump and prevline then process_syscall_def(prevline) prevline = nil end return handler(nextline, prevline) end end abort(1, "Failed to handle: " .. nextline) end local prevline for nextline in fh:lines() do -- Strip any comments nextline = nextline:gsub(commentExpr, "") if nextline ~= "" then prevline = do_match(nextline, prevline) end end -- Dump any remainder if prevline ~= nil and prevline:find("^[0-9]") then process_syscall_def(prevline) end assert(io.close(fh)) return capentries end local function get_mask(flags) local mask = 0 for _, v in ipairs(flags) do if known_flags[v] == nil then abort(1, "Checking for unknown flag " .. v) end mask = mask | known_flags[v] end return mask end local function get_mask_pat(pflags) local mask = 0 for k, v in pairs(known_flags) do if k:find(pflags) then mask = mask | v end end return mask end local function align_sysent_comment(col) write_line("sysent", "\t") col = col + 8 - col % 8 while col < 56 do write_line("sysent", "\t") col = col + 8 end end local function strip_arg_annotations(arg) arg = arg:gsub("_Contains_[^ ]*[_)] ?", "") arg = arg:gsub("_In[^ ]*[_)] ?", "") arg = arg:gsub("_Out[^ ]*[_)] ?", "") return trim(arg) end local function check_abi_changes(arg) for k, v in pairs(known_abi_flags) do local exprs = v["exprs"] if abi_changes(k) and exprs ~= nil then for _, e in pairs(exprs) do if arg:find(e) then return true end end end end return false end local function process_args(args) local funcargs = {} local changes_abi = false for arg in args:gmatch("([^,]+)") do local arg_abi_change = check_abi_changes(arg) changes_abi = changes_abi or arg_abi_change arg = strip_arg_annotations(arg) local argname = arg:match("([^* ]+)$") -- argtype is... everything else. local argtype = trim(arg:gsub(argname .. "$", ""), nil) if argtype == "" and argname == "void" then goto out end -- is64bittype() needs a bare type so check it after argname -- is removed changes_abi = changes_abi or (abi_changes("pair_64bit") and is64bittype(argtype)) argtype = argtype:gsub("intptr_t", config["abi_intptr_t"]) argtype = argtype:gsub("semid_t", config["abi_semid_t"]) if isptrtype(argtype) then argtype = argtype:gsub("size_t", config["abi_size_t"]) argtype = argtype:gsub("^long", config["abi_long"]); argtype = argtype:gsub("^u_long", config["abi_u_long"]); argtype = argtype:gsub("^const u_long", "const " .. config["abi_u_long"]); elseif argtype:find("^long$") then argtype = config["abi_long"] end if isptrarraytype(argtype) and config["abi_ptr_array_t"] ~= "" then -- `* const *` -> `**` argtype = argtype:gsub("[*][ ]*const[ ]*[*]", "**") -- e.g., `struct aiocb **` -> `uint32_t *` argtype = argtype:gsub("[^*]*[*]", config["abi_ptr_array_t"] .. " ", 1) end -- XX TODO: Forward declarations? See: sysstubfwd in CheriBSD if arg_abi_change then local abi_type_suffix = config["abi_type_suffix"] argtype = argtype:gsub("(struct [^ ]*)", "%1" .. abi_type_suffix) argtype = argtype:gsub("(union [^ ]*)", "%1" .. abi_type_suffix) end if abi_changes("pair_64bit") and is64bittype(argtype) then if #funcargs % 2 == 1 then funcargs[#funcargs + 1] = { type = "int", name = "_pad", } end funcargs[#funcargs + 1] = { type = "uint32_t", name = argname .. "1", } funcargs[#funcargs + 1] = { type = "uint32_t", name = argname .. "2", } else funcargs[#funcargs + 1] = { type = argtype, name = argname, } end end ::out:: return funcargs, changes_abi end local function handle_noncompat(sysnum, thr_flag, flags, sysflags, rettype, auditev, syscallret, funcname, funcalias, funcargs, argalias) local argssize if flags & known_flags["SYSMUX"] ~= 0 then argssize = "0" elseif #funcargs > 0 or flags & known_flags["NODEF"] ~= 0 then argssize = "AS(" .. argalias .. ")" else argssize = "0" end write_line("systrace", string.format([[ /* %s */ case %d: { ]], funcname, sysnum)) write_line("systracetmp", string.format([[ /* %s */ case %d: ]], funcname, sysnum)) write_line("systraceret", string.format([[ /* %s */ case %d: ]], funcname, sysnum)) if #funcargs > 0 and flags & known_flags["SYSMUX"] == 0 then write_line("systracetmp", "\t\tswitch (ndx) {\n") write_line("systrace", string.format( "\t\tstruct %s *p = params;\n", argalias)) local argtype, argname, desc, padding padding = "" for idx, arg in ipairs(funcargs) do argtype = arg["type"] argname = arg["name"] argtype = trim(argtype:gsub("__restrict$", ""), nil) if argtype == "int" and argname == "_pad" and abi_changes("pair_64bit") then write_line("systracetmp", "#ifdef PAD64_REQUIRED\n") end -- Pointer arg? if argtype:find("*") then desc = "userland " .. argtype else desc = argtype; end write_line("systracetmp", string.format( "\t\tcase %d%s:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", idx - 1, padding, desc)) if argtype == "int" and argname == "_pad" and abi_changes("pair_64bit") then padding = " - _P_" write_line("systracetmp", "#define _P_ 0\n#else\n#define _P_ 1\n#endif\n") end if isptrtype(argtype) then write_line("systrace", string.format( "\t\tuarg[a++] = (%s)p->%s; /* %s */\n", config["ptr_intptr_t_cast"], argname, argtype)) elseif argtype == "union l_semun" then write_line("systrace", string.format( "\t\tuarg[a++] = p->%s.buf; /* %s */\n", argname, argtype)) elseif argtype:sub(1,1) == "u" or argtype == "size_t" then write_line("systrace", string.format( "\t\tuarg[a++] = p->%s; /* %s */\n", argname, argtype)) else if argtype == "int" and argname == "_pad" and abi_changes("pair_64bit") then write_line("systrace", "#ifdef PAD64_REQUIRED\n") end write_line("systrace", string.format( "\t\tiarg[a++] = p->%s; /* %s */\n", argname, argtype)) if argtype == "int" and argname == "_pad" and abi_changes("pair_64bit") then write_line("systrace", "#endif\n") end end end write_line("systracetmp", "\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") if padding ~= "" then write_line("systracetmp", "#undef _P_\n\n") end write_line("systraceret", string.format([[ if (ndx == 0 || ndx == 1) p = "%s"; break; ]], syscallret)) end local n_args = #funcargs if flags & known_flags["SYSMUX"] ~= 0 then n_args = 0 end write_line("systrace", string.format( "\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", n_args)) write_line("systracetmp", "\t\tbreak;\n") local nargflags = get_mask({"NOARGS", "NOPROTO", "NODEF"}) if flags & nargflags == 0 then if #funcargs > 0 then write_line("sysarg", string.format("struct %s {\n", argalias)) for _, v in ipairs(funcargs) do local argname, argtype = v["name"], v["type"] if argtype == "int" and argname == "_pad" and abi_changes("pair_64bit") then write_line("sysarg", "#ifdef PAD64_REQUIRED\n") end write_line("sysarg", string.format( "\tchar %s_l_[PADL_(%s)]; %s %s; char %s_r_[PADR_(%s)];\n", argname, argtype, argtype, argname, argname, argtype)) if argtype == "int" and argname == "_pad" and abi_changes("pair_64bit") then write_line("sysarg", "#endif\n") end end write_line("sysarg", "};\n") else write_line("sysarg", string.format( - "struct %s {\n\tregister_t dummy;\n};\n", argalias)) + "struct %s {\n\tsyscallarg_t dummy;\n};\n", argalias)) end end local protoflags = get_mask({"NOPROTO", "NODEF"}) if flags & protoflags == 0 then if funcname == "nosys" or funcname == "lkmnosys" or funcname == "sysarch" or funcname:find("^freebsd") or funcname:find("^linux") then write_line("sysdcl", string.format( "%s\t%s(struct thread *, struct %s *)", rettype, funcname, argalias)) else write_line("sysdcl", string.format( "%s\tsys_%s(struct thread *, struct %s *)", rettype, funcname, argalias)) end write_line("sysdcl", ";\n") write_line("sysaue", string.format("#define\t%sAUE_%s\t%s\n", config['syscallprefix'], funcalias, auditev)) end write_line("sysent", string.format("\t{ .sy_narg = %s, .sy_call = (sy_call_t *)", argssize)) local column = 8 + 2 + #argssize + 15 if flags & known_flags["SYSMUX"] ~= 0 then write_line("sysent", string.format( "nosys, .sy_auevent = AUE_NULL, " .. ".sy_flags = %s, .sy_thrcnt = SY_THR_STATIC },", sysflags)) column = column + #"nosys" + #"AUE_NULL" + 3 elseif flags & known_flags["NOSTD"] ~= 0 then write_line("sysent", string.format( "lkmressys, .sy_auevent = AUE_NULL, " .. ".sy_flags = %s, .sy_thrcnt = SY_THR_ABSENT },", sysflags)) column = column + #"lkmressys" + #"AUE_NULL" + 3 else if funcname == "nosys" or funcname == "lkmnosys" or funcname == "sysarch" or funcname:find("^freebsd") or funcname:find("^linux") then write_line("sysent", string.format( "%s, .sy_auevent = %s, .sy_flags = %s, .sy_thrcnt = %s },", funcname, auditev, sysflags, thr_flag)) column = column + #funcname + #auditev + #sysflags + 3 else write_line("sysent", string.format( "sys_%s, .sy_auevent = %s, .sy_flags = %s, .sy_thrcnt = %s },", funcname, auditev, sysflags, thr_flag)) column = column + #funcname + #auditev + #sysflags + 7 end end align_sysent_comment(column) write_line("sysent", string.format("/* %d = %s */\n", sysnum, funcalias)) write_line("sysnames", string.format("\t\"%s\",\t\t\t/* %d = %s */\n", funcalias, sysnum, funcalias)) if flags & known_flags["NODEF"] == 0 then write_line("syshdr", string.format("#define\t%s%s\t%d\n", config['syscallprefix'], funcalias, sysnum)) write_line("sysmk", string.format(" \\\n\t%s.o", funcalias)) end end local function handle_obsol(sysnum, funcname, comment) write_line("sysent", "\t{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, " .. ".sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT },") align_sysent_comment(34) write_line("sysent", string.format("/* %d = obsolete %s */\n", sysnum, comment)) write_line("sysnames", string.format( "\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", funcname, sysnum, comment)) write_line("syshdr", string.format("\t\t\t\t/* %d is obsolete %s */\n", sysnum, comment)) end local function handle_compat(sysnum, thr_flag, flags, sysflags, rettype, auditev, funcname, funcalias, funcargs, argalias) local argssize, out, outdcl, wrap, prefix, descr if #funcargs > 0 or flags & known_flags["NODEF"] ~= 0 then argssize = "AS(" .. argalias .. ")" else argssize = "0" end for _, v in pairs(compat_options) do if flags & v["mask"] ~= 0 then if config["mincompat"] > v["compatlevel"] then funcname = strip_abi_prefix(funcname) funcname = v["prefix"] .. funcname return handle_obsol(sysnum, funcname, funcname) end v["count"] = v["count"] + 1 out = v["tmp"] outdcl = v["dcltmp"] wrap = v["flag"]:lower() prefix = v["prefix"] descr = v["descr"] goto compatdone end end ::compatdone:: local dprotoflags = get_mask({"NOPROTO", "NODEF"}) local nargflags = dprotoflags | known_flags["NOARGS"] if #funcargs > 0 and flags & nargflags == 0 then write_line(out, string.format("struct %s {\n", argalias)) for _, v in ipairs(funcargs) do local argname, argtype = v["name"], v["type"] write_line(out, string.format( "\tchar %s_l_[PADL_(%s)]; %s %s; char %s_r_[PADR_(%s)];\n", argname, argtype, argtype, argname, argname, argtype)) end write_line(out, "};\n") elseif flags & nargflags == 0 then write_line("sysarg", string.format( - "struct %s {\n\tregister_t dummy;\n};\n", argalias)) + "struct %s {\n\tsyscallarg_t dummy;\n};\n", argalias)) end if flags & dprotoflags == 0 then write_line(outdcl, string.format( "%s\t%s%s(struct thread *, struct %s *);\n", rettype, prefix, funcname, argalias)) write_line("sysaue", string.format( "#define\t%sAUE_%s%s\t%s\n", config['syscallprefix'], prefix, funcname, auditev)) end if flags & known_flags['NOSTD'] ~= 0 then write_line("sysent", string.format( "\t{ .sy_narg = %s, .sy_call = (sy_call_t *)%s, " .. ".sy_auevent = %s, .sy_flags = 0, " .. ".sy_thrcnt = SY_THR_ABSENT },", "0", "lkmressys", "AUE_NULL")) align_sysent_comment(8 + 2 + #"0" + 15 + #"lkmressys" + #"AUE_NULL" + 3) else write_line("sysent", string.format( "\t{ %s(%s,%s), .sy_auevent = %s, .sy_flags = %s, .sy_thrcnt = %s },", wrap, argssize, funcname, auditev, sysflags, thr_flag)) align_sysent_comment(8 + 9 + #argssize + 1 + #funcname + #auditev + #sysflags + 4) end write_line("sysent", string.format("/* %d = %s %s */\n", sysnum, descr, funcalias)) write_line("sysnames", string.format( "\t\"%s.%s\",\t\t/* %d = %s %s */\n", wrap, funcalias, sysnum, descr, funcalias)) -- Do not provide freebsdN_* symbols in libc for < FreeBSD 7 local nosymflags = get_mask({"COMPAT", "COMPAT4", "COMPAT6"}) if flags & nosymflags ~= 0 then write_line("syshdr", string.format( "\t\t\t\t/* %d is %s %s */\n", sysnum, descr, funcalias)) elseif flags & known_flags["NODEF"] == 0 then write_line("syshdr", string.format("#define\t%s%s%s\t%d\n", config['syscallprefix'], prefix, funcalias, sysnum)) write_line("sysmk", string.format(" \\\n\t%s%s.o", prefix, funcalias)) end end local function handle_unimpl(sysnum, sysstart, sysend, comment) if sysstart == nil and sysend == nil then sysstart = tonumber(sysnum) sysend = tonumber(sysnum) end sysnum = sysstart while sysnum <= sysend do write_line("sysent", string.format( "\t{ .sy_narg = 0, .sy_call = (sy_call_t *)nosys, " .. ".sy_auevent = AUE_NULL, .sy_flags = 0, " .. ".sy_thrcnt = SY_THR_ABSENT },\t\t\t/* %d = %s */\n", sysnum, comment)) write_line("sysnames", string.format( "\t\"#%d\",\t\t\t/* %d = %s */\n", sysnum, sysnum, comment)) sysnum = sysnum + 1 end end local function handle_reserved(sysnum, sysstart, sysend, comment) handle_unimpl(sysnum, sysstart, sysend, "reserved for local use") end process_syscall_def = function(line) local sysstart, sysend, flags, funcname, sysflags local thr_flag, syscallret local orig = line flags = 0 thr_flag = "SY_THR_STATIC" -- Parse out the interesting information first local initialExpr = "^([^%s]+)%s+([^%s]+)%s+([^%s]+)%s*" local sysnum, auditev, allflags = line:match(initialExpr) if sysnum == nil or auditev == nil or allflags == nil then -- XXX TODO: Better? abort(1, "Completely malformed: " .. line) end if sysnum:find("-") then sysstart, sysend = sysnum:match("^([%d]+)-([%d]+)$") if sysstart == nil or sysend == nil then abort(1, "Malformed range: " .. sysnum) end sysnum = nil sysstart = tonumber(sysstart) sysend = tonumber(sysend) if sysstart ~= maxsyscall + 1 then abort(1, "syscall number out of sync, missing " .. maxsyscall + 1) end else sysnum = tonumber(sysnum) if sysnum ~= maxsyscall + 1 then abort(1, "syscall number out of sync, missing " .. maxsyscall + 1) end end -- Split flags for flag in allflags:gmatch("([^|]+)") do if known_flags[flag] == nil then abort(1, "Unknown flag " .. flag .. " for " .. sysnum) end flags = flags | known_flags[flag] end if (flags & get_mask({"RESERVED", "UNIMPL"})) == 0 and sysnum == nil then abort(1, "Range only allowed with RESERVED and UNIMPL: " .. line) end if (flags & known_flags["NOTSTATIC"]) ~= 0 then thr_flag = "SY_THR_ABSENT" end -- Strip earlier bits out, leave declaration + alt line = line:gsub("^.+" .. allflags .. "%s*", "") local decl_fnd = line:find("^{") ~= nil if decl_fnd and line:find("}") == nil then abort(1, "Malformed, no closing brace: " .. line) end local decl, alt if decl_fnd then line = line:gsub("^{", "") decl, alt = line:match("([^}]*)}[%s]*(.*)$") else alt = line end if decl == nil and alt == nil then abort(1, "Malformed bits: " .. line) end local funcalias, funcomment, argalias, rettype, args if not decl_fnd and alt ~= nil and alt ~= "" then -- Peel off one entry for name funcname = trim(alt:match("^([^%s]+)"), nil) alt = alt:gsub("^([^%s]+)[%s]*", "") end -- Do we even need it? if flags & get_mask({"OBSOL", "UNIMPL"}) ~= 0 then local NF = 0 for _ in orig:gmatch("[^%s]+") do NF = NF + 1 end funcomment = funcname or '' if NF < 6 then funcomment = funcomment .. " " .. alt end funcomment = trim(funcomment) -- if funcname ~= nil then -- else -- funcomment = trim(alt) -- end goto skipalt end if alt ~= nil and alt ~= "" then local altExpr = "^([^%s]+)%s+([^%s]+)%s+([^%s]+)" funcalias, argalias, rettype = alt:match(altExpr) funcalias = trim(funcalias) if funcalias == nil or argalias == nil or rettype == nil then abort(1, "Malformed alt: " .. line) end end if decl_fnd then -- Don't clobber rettype set in the alt information if rettype == nil then rettype = "int" end -- Peel off the return type syscallret = line:match("([^%s]+)%s") line = line:match("[^%s]+%s(.+)") -- Pointer incoming if line:sub(1,1) == "*" then syscallret = syscallret .. " " end while line:sub(1,1) == "*" do line = line:sub(2) syscallret = syscallret .. "*" end funcname = line:match("^([^(]+)%(") if funcname == nil then abort(1, "Not a signature? " .. line) end args = line:match("^[^(]+%((.+)%)[^)]*$") args = trim(args, '[,%s]') end ::skipalt:: if funcname == nil then funcname = funcalias end funcname = trim(funcname) if config["obsol_dict"][funcname] then local compat_prefix = "" for _, v in pairs(compat_options) do if flags & v["mask"] ~= 0 then compat_prefix = v["prefix"] goto obsol_compat_done end end ::obsol_compat_done:: args = nil flags = known_flags['OBSOL'] funcomment = compat_prefix .. funcname end if config["unimpl_dict"][funcname] then flags = known_flags['UNIMPL'] funcomment = funcname end sysflags = "0" -- NODEF events do not get audited if flags & known_flags['NODEF'] ~= 0 then auditev = 'AUE_NULL' end -- If applicable; strip the ABI prefix from the name local stripped_name = strip_abi_prefix(funcname) if flags & known_flags['CAPENABLED'] ~= 0 or config["capenabled"][funcname] ~= nil or config["capenabled"][stripped_name] ~= nil then sysflags = "SYF_CAPENABLED" end local funcargs = {} local changes_abi = false if args ~= nil then funcargs, changes_abi = process_args(args) end if config["sys_no_abi_change"][funcname] then changes_abi = false end local noproto = config["abi_flags"] ~= "" and not changes_abi local argprefix = '' local funcprefix = '' if abi_changes("pointer_args") then for _, v in ipairs(funcargs) do if isptrtype(v["type"]) then if config["sys_no_abi_change"][funcname] then print("WARNING: " .. funcname .. " in syscall_no_abi_change, but pointers args are present") end changes_abi = true goto ptrfound end end ::ptrfound:: end if config["sys_abi_change"][funcname] then changes_abi = true end if changes_abi then -- argalias should be: -- COMPAT_PREFIX + ABI Prefix + funcname argprefix = config['abi_func_prefix'] funcprefix = config['abi_func_prefix'] funcalias = funcprefix .. funcname noproto = false end if funcname ~= nil then funcname = funcprefix .. funcname end if funcalias == nil or funcalias == "" then funcalias = funcname end if argalias == nil and funcname ~= nil then argalias = funcname .. "_args" for _, v in pairs(compat_options) do local mask = v["mask"] if (flags & mask) ~= 0 then -- Multiple aliases doesn't seem to make -- sense. argalias = v["prefix"] .. argalias goto out end end ::out:: elseif argalias ~= nil then argalias = argprefix .. argalias end local ncompatflags = get_mask({"STD", "NODEF", "NOARGS", "NOPROTO", "NOSTD"}) local compatflags = get_mask_pat("COMPAT.*") if noproto or flags & known_flags["SYSMUX"] ~= 0 then flags = flags | known_flags["NOPROTO"]; end if flags & known_flags["OBSOL"] ~= 0 then handle_obsol(sysnum, funcname, funcomment) elseif flags & known_flags["RESERVED"] ~= 0 then handle_reserved(sysnum, sysstart, sysend) elseif flags & known_flags["UNIMPL"] ~= 0 then handle_unimpl(sysnum, sysstart, sysend, funcomment) elseif flags & compatflags ~= 0 then if flags & known_flags['STD'] ~= 0 then abort(1, "Incompatible COMPAT/STD: " .. line) end handle_compat(sysnum, thr_flag, flags, sysflags, rettype, auditev, funcname, funcalias, funcargs, argalias) elseif flags & ncompatflags ~= 0 then handle_noncompat(sysnum, thr_flag, flags, sysflags, rettype, auditev, syscallret, funcname, funcalias, funcargs, argalias) else abort(1, "Bad flags? " .. line) end if sysend ~= nil then maxsyscall = sysend elseif sysnum ~= nil then maxsyscall = sysnum end end -- Entry point if #arg < 1 or #arg > 2 then error("usage: " .. arg[0] .. " input-file ") end local sysfile, configfile = arg[1], arg[2] -- process_config either returns nil and a message, or a -- table that we should merge into the global config if configfile ~= nil then local res = assert(process_config(configfile)) for k, v in pairs(res) do if v ~= config[k] then config[k] = v config_modified[k] = true end end end local compat_set = config['compat_set'] if compat_set ~= "" then if not compat_option_sets[compat_set] then abort(1, "Undefined compat set: " .. compat_set) end compat_options = compat_option_sets[compat_set] else compat_options = {} end -- We ignore errors here if we're relying on the default configuration. if not config_modified["capenabled"] then config["capenabled"] = grab_capenabled(config['capabilities_conf'], config_modified["capabilities_conf"] == nil) elseif config["capenabled"] ~= "" then -- Due to limitations in the config format mostly, we'll have a comma -- separated list. Parse it into lines local capenabled = {} -- print("here: " .. config["capenabled"]) for sysc in config["capenabled"]:gmatch("([^,]+)") do capenabled[sysc] = true end config["capenabled"] = capenabled end process_compat() process_abi_flags() process_syscall_abi_change() process_obsol() process_unimpl() if not lfs.mkdir(tmpspace) then error("Failed to create tempdir " .. tmpspace) end -- XXX Revisit the error handling here, we should probably move the rest of this -- into a function that we pcall() so we can catch the errors and clean up -- gracefully. for _, v in ipairs(temp_files) do local tmpname = tmpspace .. v files[v] = io.open(tmpname, "w+") -- XXX Revisit these with a pcall() + error handler if not files[v] then abort(1, "Failed to open temp file: " .. tmpname) end end for _, v in ipairs(output_files) do local tmpname = tmpspace .. v files[v] = io.open(tmpname, "w+") -- XXX Revisit these with a pcall() + error handler if not files[v] then abort(1, "Failed to open temp output file: " .. tmpname) end end -- Write out all of the preamble bits write_line("sysent", string.format([[ /* The casts are bogus but will do for now. */ struct sysent %s[] = { ]], config['switchname'])) write_line("syssw", string.format([[/* * System call switch table. * * DO NOT EDIT-- this file is automatically %s. * $%s$ */ ]], generated_tag, config['os_id_keyword'])) write_line("sysarg", string.format([[/* * System call prototypes. * * DO NOT EDIT-- this file is automatically %s. * $%s$ */ #ifndef %s #define %s #include #include #include #include #include #include #include #include #include struct proc; struct thread; -#define PAD_(t) (sizeof(register_t) <= sizeof(t) ? \ - 0 : sizeof(register_t) - sizeof(t)) +#define PAD_(t) (sizeof(syscallarg_t) <= sizeof(t) ? \ + 0 : sizeof(syscallarg_t) - sizeof(t)) #if BYTE_ORDER == LITTLE_ENDIAN #define PADL_(t) 0 #define PADR_(t) PAD_(t) #else #define PADL_(t) PAD_(t) #define PADR_(t) 0 #endif ]], generated_tag, config['os_id_keyword'], config['sysproto_h'], config['sysproto_h'])) if abi_changes("pair_64bit") then write_line("sysarg", string.format([[ #if !defined(PAD64_REQUIRED) && !defined(__amd64__) #define PAD64_REQUIRED #endif ]])) end if abi_changes("pair_64bit") then write_line("systrace", string.format([[ #if !defined(PAD64_REQUIRED) && !defined(__amd64__) #define PAD64_REQUIRED #endif ]])) end for _, v in pairs(compat_options) do write_line(v["tmp"], string.format("\n#ifdef %s\n\n", v["definition"])) end write_line("sysnames", string.format([[/* * System call names. * * DO NOT EDIT-- this file is automatically %s. * $%s$ */ const char *%s[] = { ]], generated_tag, config['os_id_keyword'], config['namesname'])) write_line("syshdr", string.format([[/* * System call numbers. * * DO NOT EDIT-- this file is automatically %s. * $%s$ */ ]], generated_tag, config['os_id_keyword'])) write_line("sysmk", string.format([[# FreeBSD system call object files. # DO NOT EDIT-- this file is automatically %s. # $%s$ MIASM = ]], generated_tag, config['os_id_keyword'])) write_line("systrace", string.format([[/* * System call argument to DTrace register array converstion. * * DO NOT EDIT-- this file is automatically %s. * $%s$ * This file is part of the DTrace syscall provider. */ static void systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) { int64_t *iarg = (int64_t *)uarg; int a = 0; switch (sysnum) { ]], generated_tag, config['os_id_keyword'])) write_line("systracetmp", [[static void systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) { const char *p = NULL; switch (sysnum) { ]]) write_line("systraceret", [[static void systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) { const char *p = NULL; switch (sysnum) { ]]) -- Processing the sysfile will parse out the preprocessor bits and put them into -- the appropriate place. Any syscall-looking lines get thrown into the sysfile -- buffer, one per line, for later processing once they're all glued together. process_sysfile(sysfile) write_line("sysinc", - "\n#define AS(name) (sizeof(struct name) / sizeof(register_t))\n") + "\n#define AS(name) (sizeof(struct name) / sizeof(syscallarg_t))\n") for _, v in pairs(compat_options) do if v["count"] > 0 then write_line("sysinc", string.format([[ #ifdef %s #define %s(n, name) .sy_narg = n, .sy_call = (sy_call_t *)__CONCAT(%s, name) #else #define %s(n, name) .sy_narg = 0, .sy_call = (sy_call_t *)nosys #endif ]], v["definition"], v["flag"]:lower(), v["prefix"], v["flag"]:lower())) end write_line(v["dcltmp"], string.format("\n#endif /* %s */\n\n", v["definition"])) end write_line("sysprotoend", string.format([[ #undef PAD_ #undef PADL_ #undef PADR_ #endif /* !%s */ ]], config["sysproto_h"])) write_line("sysmk", "\n") write_line("sysent", "};\n") write_line("sysnames", "};\n") -- maxsyscall is the highest seen; MAXSYSCALL should be one higher write_line("syshdr", string.format("#define\t%sMAXSYSCALL\t%d\n", config["syscallprefix"], maxsyscall + 1)) write_line("systrace", [[ default: *n_args = 0; break; }; } ]]) write_line("systracetmp", [[ default: break; }; if (p != NULL) strlcpy(desc, p, descsz); } ]]) write_line("systraceret", [[ default: break; }; if (p != NULL) strlcpy(desc, p, descsz); } ]]) -- Finish up; output write_line("syssw", read_file("sysinc")) write_line("syssw", read_file("sysent")) write_line("sysproto", read_file("sysarg")) write_line("sysproto", read_file("sysdcl")) for _, v in pairs(compat_options) do write_line("sysproto", read_file(v["tmp"])) write_line("sysproto", read_file(v["dcltmp"])) end write_line("sysproto", read_file("sysaue")) write_line("sysproto", read_file("sysprotoend")) write_line("systrace", read_file("systracetmp")) write_line("systrace", read_file("systraceret")) for _, v in ipairs(output_files) do local target = config[v] if target ~= "/dev/null" then local fh = assert(io.open(target, "w+")) if fh == nil then abort(1, "Failed to open '" .. target .. "'") end assert(fh:write(read_file(v))) assert(fh:close()) end end cleanup() diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 8ad049ed6d5e..db9a32d1c9bc 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1,1699 +1,1699 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988 University of Utah. * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 */ /* * Mapped file (mmap) interface to VM */ #include __FBSDID("$FreeBSD$"); #include "opt_hwpmc_hooks.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif int old_mlock = 0; SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, "Do not apply RLIMIT_MEMLOCK on mlockall"); static int mincore_mapped = 1; SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, "mincore reports mappings, not residency"); static int imply_prot_max = 0; SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0, "Imply maximum page protections in mmap() when none are specified"); #ifdef MAP_32BIT #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) #endif _Static_assert(MAXPAGESIZES <= 4, "MINCORE_SUPER too narrow"); #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { int incr; }; #endif int sys_sbrk(struct thread *td, struct sbrk_args *uap) { /* Not yet implemented */ return (EOPNOTSUPP); } #ifndef _SYS_SYSPROTO_H_ struct sstk_args { int incr; }; #endif int sys_sstk(struct thread *td, struct sstk_args *uap) { /* Not yet implemented */ return (EOPNOTSUPP); } #if defined(COMPAT_43) int ogetpagesize(struct thread *td, struct ogetpagesize_args *uap) { td->td_retval[0] = PAGE_SIZE; return (0); } #endif /* COMPAT_43 */ /* * Memory Map (mmap) system call. Note that the file offset * and address are allowed to be NOT page aligned, though if * the MAP_FIXED flag it set, both must have the same remainder * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not * page-aligned, the actual mapping starts at trunc_page(addr) * and the return value is adjusted up by the page offset. * * Generally speaking, only character devices which are themselves * memory-based, such as a video framebuffer, can be mmap'd. Otherwise * there would be no cache coherency between a descriptor and a VM mapping * both to the same character device. */ #ifndef _SYS_SYSPROTO_H_ struct mmap_args { void *addr; size_t len; int prot; int flags; int fd; long pad; off_t pos; }; #endif int sys_mmap(struct thread *td, struct mmap_args *uap) { return (kern_mmap(td, &(struct mmap_req){ .mr_hint = (uintptr_t)uap->addr, .mr_len = uap->len, .mr_prot = uap->prot, .mr_flags = uap->flags, .mr_fd = uap->fd, .mr_pos = uap->pos, })); } int kern_mmap_maxprot(struct proc *p, int prot) { if ((p->p_flag2 & P2_PROTMAX_DISABLE) != 0 || (p->p_fctl0 & NT_FREEBSD_FCTL_PROTMAX_DISABLE) != 0) return (_PROT_ALL); if (((p->p_flag2 & P2_PROTMAX_ENABLE) != 0 || imply_prot_max) && prot != PROT_NONE) return (prot); return (_PROT_ALL); } int kern_mmap(struct thread *td, const struct mmap_req *mrp) { struct vmspace *vms; struct file *fp; struct proc *p; off_t pos; vm_offset_t addr, orig_addr; vm_size_t len, pageoff, size; vm_prot_t cap_maxprot; int align, error, fd, flags, max_prot, prot; cap_rights_t rights; mmap_check_fp_fn check_fp_fn; orig_addr = addr = mrp->mr_hint; len = mrp->mr_len; prot = mrp->mr_prot; flags = mrp->mr_flags; fd = mrp->mr_fd; pos = mrp->mr_pos; check_fp_fn = mrp->mr_check_fp_fn; if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) return (EINVAL); max_prot = PROT_MAX_EXTRACT(prot); prot = PROT_EXTRACT(prot); if (max_prot != 0 && (max_prot & prot) != prot) return (ENOTSUP); p = td->td_proc; /* * Always honor PROT_MAX if set. If not, default to all * permissions unless we're implying maximum permissions. */ if (max_prot == 0) max_prot = kern_mmap_maxprot(p, prot); vms = p->p_vmspace; fp = NULL; AUDIT_ARG_FD(fd); /* * Ignore old flags that used to be defined but did not do anything. */ flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); /* * Enforce the constraints. * Mapping of length 0 is only allowed for old binaries. * Anonymous mapping shall specify -1 as filedescriptor and * zero position for new code. Be nice to ancient a.out * binaries and correct pos for anonymous mapping, since old * ld.so sometimes issues anonymous map requests with non-zero * pos. */ if (!SV_CURPROC_FLAG(SV_AOUT)) { if ((len == 0 && p->p_osrel >= P_OSREL_MAP_ANON) || ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) return (EINVAL); } else { if ((flags & MAP_ANON) != 0) pos = 0; } if (flags & MAP_STACK) { if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) return (EINVAL); flags |= MAP_ANON; pos = 0; } if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | MAP_PREFAULT_READ | MAP_GUARD | #ifdef MAP_32BIT MAP_32BIT | #endif MAP_ALIGNMENT_MASK)) != 0) return (EINVAL); if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) return (EINVAL); if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) return (EINVAL); if (prot != PROT_NONE && (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) return (EINVAL); if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL | #ifdef MAP_32BIT MAP_32BIT | #endif MAP_ALIGNMENT_MASK)) != 0)) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (pos & PAGE_MASK); pos -= pageoff; /* Compute size from len by rounding (on both ends). */ size = len + pageoff; /* low end... */ size = round_page(size); /* hi end */ /* Check for rounding up to zero. */ if (len > size) return (ENOMEM); /* Ensure alignment is at least a page and fits in a pointer. */ align = flags & MAP_ALIGNMENT_MASK; if (align != 0 && align != MAP_ALIGNED_SUPER && (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) return (EINVAL); /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ addr -= pageoff; if (addr & PAGE_MASK) return (EINVAL); /* Address range must be all in user VM space. */ if (!vm_map_range_valid(&vms->vm_map, addr, addr + size)) return (EINVAL); #ifdef MAP_32BIT if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) return (EINVAL); } else if (flags & MAP_32BIT) { /* * For MAP_32BIT, override the hint if it is too high and * do not bother moving the mapping past the heap (since * the heap is usually above 2GB). */ if (addr + size > MAP_32BIT_MAX_ADDR) addr = 0; #endif } else { /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ if (addr == 0 || (addr >= round_page((vm_offset_t)vms->vm_taddr) && addr < round_page((vm_offset_t)vms->vm_daddr + lim_max(td, RLIMIT_DATA)))) addr = round_page((vm_offset_t)vms->vm_daddr + lim_max(td, RLIMIT_DATA)); } if (len == 0) { /* * Return success without mapping anything for old * binaries that request a page-aligned mapping of * length 0. For modern binaries, this function * returns an error earlier. */ error = 0; } else if ((flags & MAP_GUARD) != 0) { error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, VM_PROT_NONE, flags, NULL, pos, FALSE, td); } else if ((flags & MAP_ANON) != 0) { /* * Mapping blank space is trivial. * * This relies on VM_PROT_* matching PROT_*. */ error = vm_mmap_object(&vms->vm_map, &addr, size, prot, max_prot, flags, NULL, pos, FALSE, td); } else { /* * Mapping file, get fp for validation and don't let the * descriptor disappear on us if we block. Check capability * rights, but also return the maximum rights to be combined * with maxprot later. */ cap_rights_init_one(&rights, CAP_MMAP); if (prot & PROT_READ) cap_rights_set_one(&rights, CAP_MMAP_R); if ((flags & MAP_SHARED) != 0) { if (prot & PROT_WRITE) cap_rights_set_one(&rights, CAP_MMAP_W); } if (prot & PROT_EXEC) cap_rights_set_one(&rights, CAP_MMAP_X); error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); if (error != 0) goto done; if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && p->p_osrel >= P_OSREL_MAP_FSTRICT) { error = EINVAL; goto done; } if (check_fp_fn != NULL) { error = check_fp_fn(fp, prot, max_prot & cap_maxprot, flags); if (error != 0) goto done; } if (fp->f_ops == &shm_ops && shm_largepage(fp->f_data)) addr = orig_addr; /* This relies on VM_PROT_* matching PROT_*. */ error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, max_prot & cap_maxprot, flags, pos, td); } if (error == 0) - td->td_retval[0] = (register_t) (addr + pageoff); + td->td_retval[0] = addr + pageoff; done: if (fp) fdrop(fp, td); return (error); } #if defined(COMPAT_FREEBSD6) int freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) { return (kern_mmap(td, &(struct mmap_req){ .mr_hint = (uintptr_t)uap->addr, .mr_len = uap->len, .mr_prot = uap->prot, .mr_flags = uap->flags, .mr_fd = uap->fd, .mr_pos = uap->pos, })); } #endif #ifdef COMPAT_43 #ifndef _SYS_SYSPROTO_H_ struct ommap_args { caddr_t addr; int len; int prot; int flags; int fd; long pos; }; #endif int ommap(struct thread *td, struct ommap_args *uap) { return (kern_ommap(td, (uintptr_t)uap->addr, uap->len, uap->prot, uap->flags, uap->fd, uap->pos)); } int kern_ommap(struct thread *td, uintptr_t hint, int len, int oprot, int oflags, int fd, long pos) { static const char cvtbsdprot[8] = { 0, PROT_EXEC, PROT_WRITE, PROT_EXEC | PROT_WRITE, PROT_READ, PROT_EXEC | PROT_READ, PROT_WRITE | PROT_READ, PROT_EXEC | PROT_WRITE | PROT_READ, }; int flags, prot; if (len < 0) return (EINVAL); #define OMAP_ANON 0x0002 #define OMAP_COPY 0x0020 #define OMAP_SHARED 0x0010 #define OMAP_FIXED 0x0100 prot = cvtbsdprot[oprot & 0x7]; #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__) if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && prot != 0) prot |= PROT_EXEC; #endif flags = 0; if (oflags & OMAP_ANON) flags |= MAP_ANON; if (oflags & OMAP_COPY) flags |= MAP_COPY; if (oflags & OMAP_SHARED) flags |= MAP_SHARED; else flags |= MAP_PRIVATE; if (oflags & OMAP_FIXED) flags |= MAP_FIXED; return (kern_mmap(td, &(struct mmap_req){ .mr_hint = hint, .mr_len = len, .mr_prot = prot, .mr_flags = flags, .mr_fd = fd, .mr_pos = pos, })); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct msync_args { void *addr; size_t len; int flags; }; #endif int sys_msync(struct thread *td, struct msync_args *uap) { return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); } int kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) { vm_offset_t addr; vm_size_t pageoff; vm_map_t map; int rv; addr = addr0; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (addr + size < addr) return (EINVAL); if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) return (EINVAL); map = &td->td_proc->p_vmspace->vm_map; /* * Clean the pages and interpret the return value. */ rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, (flags & MS_INVALIDATE) != 0); switch (rv) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: return (ENOMEM); case KERN_INVALID_ARGUMENT: return (EBUSY); case KERN_FAILURE: return (EIO); default: return (EINVAL); } } #ifndef _SYS_SYSPROTO_H_ struct munmap_args { void *addr; size_t len; }; #endif int sys_munmap(struct thread *td, struct munmap_args *uap) { return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); } int kern_munmap(struct thread *td, uintptr_t addr0, size_t size) { #ifdef HWPMC_HOOKS struct pmckern_map_out pkm; vm_map_entry_t entry; bool pmc_handled; #endif vm_offset_t addr, end; vm_size_t pageoff; vm_map_t map; int rv; if (size == 0) return (EINVAL); addr = addr0; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); end = addr + size; map = &td->td_proc->p_vmspace->vm_map; if (!vm_map_range_valid(map, addr, end)) return (EINVAL); vm_map_lock(map); #ifdef HWPMC_HOOKS pmc_handled = false; if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { pmc_handled = true; /* * Inform hwpmc if the address range being unmapped contains * an executable region. */ pkm.pm_address = (uintptr_t) NULL; if (vm_map_lookup_entry(map, addr, &entry)) { for (; entry->start < end; entry = vm_map_entry_succ(entry)) { if (vm_map_check_protection(map, entry->start, entry->end, VM_PROT_EXECUTE) == TRUE) { pkm.pm_address = (uintptr_t) addr; pkm.pm_size = (size_t) size; break; } } } } #endif rv = vm_map_delete(map, addr, end); #ifdef HWPMC_HOOKS if (rv == KERN_SUCCESS && __predict_false(pmc_handled)) { /* downgrade the lock to prevent a LOR with the pmc-sx lock */ vm_map_lock_downgrade(map); if (pkm.pm_address != (uintptr_t) NULL) PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); vm_map_unlock_read(map); } else #endif vm_map_unlock(map); return (vm_mmap_to_errno(rv)); } #ifndef _SYS_SYSPROTO_H_ struct mprotect_args { const void *addr; size_t len; int prot; }; #endif int sys_mprotect(struct thread *td, struct mprotect_args *uap) { return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); } int kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) { vm_offset_t addr; vm_size_t pageoff; int vm_error, max_prot; int flags; addr = addr0; if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) return (EINVAL); max_prot = PROT_MAX_EXTRACT(prot); prot = PROT_EXTRACT(prot); pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { if (((addr + size) & 0xffffffff) < addr) return (EINVAL); } else #endif if (addr + size < addr) return (EINVAL); flags = VM_MAP_PROTECT_SET_PROT; if (max_prot != 0) flags |= VM_MAP_PROTECT_SET_MAXPROT; vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, addr + size, prot, max_prot, flags); switch (vm_error) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); case KERN_RESOURCE_SHORTAGE: return (ENOMEM); case KERN_OUT_OF_BOUNDS: return (ENOTSUP); } return (EINVAL); } #ifndef _SYS_SYSPROTO_H_ struct minherit_args { void *addr; size_t len; int inherit; }; #endif int sys_minherit(struct thread *td, struct minherit_args *uap) { return (kern_minherit(td, (uintptr_t)uap->addr, uap->len, uap->inherit)); } int kern_minherit(struct thread *td, uintptr_t addr0, size_t len, int inherit0) { vm_offset_t addr; vm_size_t size, pageoff; vm_inherit_t inherit; addr = (vm_offset_t)addr0; size = len; inherit = inherit0; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (addr + size < addr) return (EINVAL); switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, addr + size, inherit)) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); } return (EINVAL); } #ifndef _SYS_SYSPROTO_H_ struct madvise_args { void *addr; size_t len; int behav; }; #endif int sys_madvise(struct thread *td, struct madvise_args *uap) { return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); } int kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) { vm_map_t map; vm_offset_t addr, end, start; int flags; /* * Check for our special case, advising the swap pager we are * "immortal." */ if (behav == MADV_PROTECT) { flags = PPROT_SET; return (kern_procctl(td, P_PID, td->td_proc->p_pid, PROC_SPROTECT, &flags)); } /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ map = &td->td_proc->p_vmspace->vm_map; addr = addr0; if (!vm_map_range_valid(map, addr, addr + len)) return (EINVAL); /* * Since this routine is only advisory, we default to conservative * behavior. */ start = trunc_page(addr); end = round_page(addr + len); /* * vm_map_madvise() checks for illegal values of behav. */ return (vm_map_madvise(map, start, end, behav)); } #ifndef _SYS_SYSPROTO_H_ struct mincore_args { const void *addr; size_t len; char *vec; }; #endif int sys_mincore(struct thread *td, struct mincore_args *uap) { return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); } int kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) { pmap_t pmap; vm_map_t map; vm_map_entry_t current, entry; vm_object_t object; vm_offset_t addr, cend, end, first_addr; vm_paddr_t pa; vm_page_t m; vm_pindex_t pindex; int error, lastvecindex, mincoreinfo, vecindex; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page(addr0); end = round_page(addr0 + len); map = &td->td_proc->p_vmspace->vm_map; if (end > vm_map_max(map) || end < addr) return (ENOMEM); pmap = vmspace_pmap(td->td_proc->p_vmspace); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) { vm_map_unlock_read(map); return (ENOMEM); } /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; while (entry->start < end) { /* * check for contiguity */ current = entry; entry = vm_map_entry_succ(current); if (current->end < end && entry->start > current->end) { vm_map_unlock_read(map); return (ENOMEM); } /* * ignore submaps (for now) or null objects */ if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; for (; addr < cend; addr += PAGE_SIZE) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. */ m = NULL; object = NULL; retry: pa = 0; mincoreinfo = pmap_mincore(pmap, addr, &pa); if (mincore_mapped) { /* * We only care about this pmap's * mapping of the page, if any. */ ; } else if (pa != 0) { /* * The page is mapped by this process but not * both accessed and modified. It is also * managed. Acquire the object lock so that * other mappings might be examined. The page's * identity may change at any point before its * object lock is acquired, so re-validate if * necessary. */ m = PHYS_TO_VM_PAGE(pa); while (object == NULL || m->object != object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = atomic_load_ptr(&m->object); if (object == NULL) goto retry; VM_OBJECT_WLOCK(object); } if (pa != pmap_extract(pmap, addr)) goto retry; KASSERT(vm_page_all_valid(m), ("mincore: page %p is mapped but invalid", m)); } else if (mincoreinfo == 0) { /* * The page is not mapped by this process. If * the object implements managed pages, then * determine if the page is resident so that * the mappings might be examined. */ if (current->object.vm_object != object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = current->object.vm_object; VM_OBJECT_WLOCK(object); } if (object->type == OBJT_DEFAULT || (object->flags & OBJ_SWAP) != 0 || object->type == OBJT_VNODE) { pindex = OFF_TO_IDX(current->offset + (addr - current->start)); m = vm_page_lookup(object, pindex); if (m != NULL && vm_page_none_valid(m)) m = NULL; if (m != NULL) mincoreinfo = MINCORE_INCORE; } } if (m != NULL) { VM_OBJECT_ASSERT_WLOCKED(m->object); /* Examine other mappings of the page. */ if (m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); if (m->dirty != 0) mincoreinfo |= MINCORE_MODIFIED_OTHER; /* * The first test for PGA_REFERENCED is an * optimization. The second test is * required because a concurrent pmap * operation could clear the last reference * and set PGA_REFERENCED before the call to * pmap_is_referenced(). */ if ((m->a.flags & PGA_REFERENCED) != 0 || pmap_is_referenced(m) || (m->a.flags & PGA_REFERENCED) != 0) mincoreinfo |= MINCORE_REFERENCED_OTHER; } if (object != NULL) VM_OBJECT_WUNLOCK(object); /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = atop(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while ((lastvecindex + 1) < vecindex) { ++lastvecindex; error = subyte(vec + lastvecindex, 0); if (error) { error = EFAULT; goto done2; } } /* * Pass the page information to the user */ error = subyte(vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done2; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = atop(end - first_addr); while ((lastvecindex + 1) < vecindex) { ++lastvecindex; error = subyte(vec + lastvecindex, 0); if (error) { error = EFAULT; goto done2; } } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); done2: return (error); } #ifndef _SYS_SYSPROTO_H_ struct mlock_args { const void *addr; size_t len; }; #endif int sys_mlock(struct thread *td, struct mlock_args *uap) { return (kern_mlock(td->td_proc, td->td_ucred, __DECONST(uintptr_t, uap->addr), uap->len)); } int kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) { vm_offset_t addr, end, last, start; vm_size_t npages, size; vm_map_t map; unsigned long nsize; int error; error = priv_check_cred(cred, PRIV_VM_MLOCK); if (error) return (error); addr = addr0; size = len; last = addr + size; start = trunc_page(addr); end = round_page(last); if (last < addr || end < addr) return (EINVAL); npages = atop(end - start); if (npages > vm_page_max_user_wired) return (ENOMEM); map = &proc->p_vmspace->vm_map; PROC_LOCK(proc); nsize = ptoa(npages + pmap_wired_count(map->pmap)); if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { PROC_UNLOCK(proc); return (ENOMEM); } PROC_UNLOCK(proc); #ifdef RACCT if (racct_enable) { PROC_LOCK(proc); error = racct_set(proc, RACCT_MEMLOCK, nsize); PROC_UNLOCK(proc); if (error != 0) return (ENOMEM); } #endif error = vm_map_wire(map, start, end, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); #ifdef RACCT if (racct_enable && error != KERN_SUCCESS) { PROC_LOCK(proc); racct_set(proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); PROC_UNLOCK(proc); } #endif switch (error) { case KERN_SUCCESS: return (0); case KERN_INVALID_ARGUMENT: return (EINVAL); default: return (ENOMEM); } } #ifndef _SYS_SYSPROTO_H_ struct mlockall_args { int how; }; #endif int sys_mlockall(struct thread *td, struct mlockall_args *uap) { vm_map_t map; int error; map = &td->td_proc->p_vmspace->vm_map; error = priv_check(td, PRIV_VM_MLOCK); if (error) return (error); if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) return (EINVAL); /* * If wiring all pages in the process would cause it to exceed * a hard resource limit, return ENOMEM. */ if (!old_mlock && uap->how & MCL_CURRENT) { if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) return (ENOMEM); } #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); PROC_UNLOCK(td->td_proc); if (error != 0) return (ENOMEM); } #endif if (uap->how & MCL_FUTURE) { vm_map_lock(map); vm_map_modflags(map, MAP_WIREFUTURE, 0); vm_map_unlock(map); error = 0; } if (uap->how & MCL_CURRENT) { /* * P1003.1-2001 mandates that all currently mapped pages * will be memory resident and locked (wired) upon return * from mlockall(). vm_map_wire() will wire pages, by * calling vm_fault_wire() for each page in the region. */ error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); if (error == KERN_SUCCESS) error = 0; else if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; else error = EAGAIN; } #ifdef RACCT if (racct_enable && error != KERN_SUCCESS) { PROC_LOCK(td->td_proc); racct_set(td->td_proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); PROC_UNLOCK(td->td_proc); } #endif return (error); } #ifndef _SYS_SYSPROTO_H_ struct munlockall_args { register_t dummy; }; #endif int sys_munlockall(struct thread *td, struct munlockall_args *uap) { vm_map_t map; int error; map = &td->td_proc->p_vmspace->vm_map; error = priv_check(td, PRIV_VM_MUNLOCK); if (error) return (error); /* Clear the MAP_WIREFUTURE flag from this vm_map. */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE); vm_map_unlock(map); /* Forcibly unwire all pages. */ error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); #ifdef RACCT if (racct_enable && error == KERN_SUCCESS) { PROC_LOCK(td->td_proc); racct_set(td->td_proc, RACCT_MEMLOCK, 0); PROC_UNLOCK(td->td_proc); } #endif return (error); } #ifndef _SYS_SYSPROTO_H_ struct munlock_args { const void *addr; size_t len; }; #endif int sys_munlock(struct thread *td, struct munlock_args *uap) { return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); } int kern_munlock(struct thread *td, uintptr_t addr0, size_t size) { vm_offset_t addr, end, last, start; #ifdef RACCT vm_map_t map; #endif int error; error = priv_check(td, PRIV_VM_MUNLOCK); if (error) return (error); addr = addr0; last = addr + size; start = trunc_page(addr); end = round_page(last); if (last < addr || end < addr) return (EINVAL); error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); #ifdef RACCT if (racct_enable && error == KERN_SUCCESS) { PROC_LOCK(td->td_proc); map = &td->td_proc->p_vmspace->vm_map; racct_set(td->td_proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); PROC_UNLOCK(td->td_proc); } #endif return (error == KERN_SUCCESS ? 0 : ENOMEM); } /* * vm_mmap_vnode() * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on vnodes. */ int vm_mmap_vnode(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, boolean_t *writecounted) { struct vattr va; vm_object_t obj; vm_ooffset_t foff; struct ucred *cred; int error, flags; bool writex; cred = td->td_ucred; writex = (*maxprotp & VM_PROT_WRITE) != 0 && (*flagsp & MAP_SHARED) != 0; if ((error = vget(vp, LK_SHARED)) != 0) return (error); AUDIT_ARG_VNODE1(vp); foff = *foffp; flags = *flagsp; obj = vp->v_object; if (vp->v_type == VREG) { /* * Get the proper underlying object */ if (obj == NULL) { error = EINVAL; goto done; } if (obj->type == OBJT_VNODE && obj->handle != vp) { vput(vp); vp = (struct vnode *)obj->handle; /* * Bypass filesystems obey the mpsafety of the * underlying fs. Tmpfs never bypasses. */ error = vget(vp, LK_SHARED); if (error != 0) return (error); } if (writex) { *writecounted = TRUE; vm_pager_update_writecount(obj, 0, objsize); } } else { error = EINVAL; goto done; } if ((error = VOP_GETATTR(vp, &va, cred))) goto done; #ifdef MAC /* This relies on VM_PROT_* matching PROT_*. */ error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); if (error != 0) goto done; #endif if ((flags & MAP_SHARED) != 0) { if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { if (prot & VM_PROT_WRITE) { error = EPERM; goto done; } *maxprotp &= ~VM_PROT_WRITE; } } /* * If it is a regular file without any references * we do not need to sync it. * Adjust object size to be the size of actual file. */ objsize = round_page(va.va_size); if (va.va_nlink == 0) flags |= MAP_NOSYNC; if (obj->type == OBJT_VNODE) { obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); if (obj == NULL) { error = ENOMEM; goto done; } } else { KASSERT(obj->type == OBJT_DEFAULT || (obj->flags & OBJ_SWAP) != 0, ("wrong object type")); vm_object_reference(obj); #if VM_NRESERVLEVEL > 0 if ((obj->flags & OBJ_COLORED) == 0) { VM_OBJECT_WLOCK(obj); vm_object_color(obj, 0); VM_OBJECT_WUNLOCK(obj); } #endif } *objp = obj; *flagsp = flags; VOP_MMAPPED(vp); done: if (error != 0 && *writecounted) { *writecounted = FALSE; vm_pager_update_writecount(obj, objsize, 0); } vput(vp); return (error); } /* * vm_mmap_cdev() * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on cdevs. */ int vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, vm_ooffset_t *foff, vm_object_t *objp) { vm_object_t obj; int error, flags; flags = *flagsp; if (dsw->d_flags & D_MMAP_ANON) { *objp = NULL; *foff = 0; *maxprotp = VM_PROT_ALL; *flagsp |= MAP_ANON; return (0); } /* * cdevs do not provide private mappings of any kind. */ if ((*maxprotp & VM_PROT_WRITE) == 0 && (prot & VM_PROT_WRITE) != 0) return (EACCES); if (flags & (MAP_PRIVATE|MAP_COPY)) return (EINVAL); /* * Force device mappings to be shared. */ flags |= MAP_SHARED; #ifdef MAC_XXX error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); if (error != 0) return (error); #endif /* * First, try d_mmap_single(). If that is not implemented * (returns ENODEV), fall back to using the device pager. * Note that d_mmap_single() must return a reference to the * object (it needs to bump the reference count of the object * it returns somehow). * * XXX assumes VM_PROT_* == PROT_* */ error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); if (error != ENODEV) return (error); obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, td->td_ucred); if (obj == NULL) return (EINVAL); *objp = obj; *flagsp = flags; return (0); } /* * vm_mmap() * * Internal version of mmap used by exec, sys5 shared memory, and * various device drivers. Handle is either a vnode pointer, a * character device, or NULL for MAP_ANON. */ int vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t maxprot, int flags, objtype_t handle_type, void *handle, vm_ooffset_t foff) { vm_object_t object; struct thread *td = curthread; int error; boolean_t writecounted; if (size == 0) return (EINVAL); size = round_page(size); object = NULL; writecounted = FALSE; /* * Lookup/allocate object. */ switch (handle_type) { case OBJT_DEVICE: { struct cdevsw *dsw; struct cdev *cdev; int ref; cdev = handle; dsw = dev_refthread(cdev, &ref); if (dsw == NULL) return (ENXIO); error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, dsw, &foff, &object); dev_relthread(cdev, ref); break; } case OBJT_VNODE: error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, handle, &foff, &object, &writecounted); break; case OBJT_DEFAULT: if (handle == NULL) { error = 0; break; } /* FALLTHROUGH */ default: error = EINVAL; break; } if (error) return (error); error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, foff, writecounted, td); if (error != 0 && object != NULL) { /* * If this mapping was accounted for in the vnode's * writecount, then undo that now. */ if (writecounted) vm_pager_release_writecount(object, 0, size); vm_object_deallocate(object); } return (error); } int kern_mmap_racct_check(struct thread *td, vm_map_t map, vm_size_t size) { int error; RACCT_PROC_LOCK(td->td_proc); if (map->size + size > lim_cur(td, RLIMIT_VMEM)) { RACCT_PROC_UNLOCK(td->td_proc); return (ENOMEM); } if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { RACCT_PROC_UNLOCK(td->td_proc); return (ENOMEM); } if (!old_mlock && map->flags & MAP_WIREFUTURE) { if (ptoa(pmap_wired_count(map->pmap)) + size > lim_cur(td, RLIMIT_MEMLOCK)) { racct_set_force(td->td_proc, RACCT_VMEM, map->size); RACCT_PROC_UNLOCK(td->td_proc); return (ENOMEM); } error = racct_set(td->td_proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap)) + size); if (error != 0) { racct_set_force(td->td_proc, RACCT_VMEM, map->size); RACCT_PROC_UNLOCK(td->td_proc); return (error); } } RACCT_PROC_UNLOCK(td->td_proc); return (0); } /* * Internal version of mmap that maps a specific VM object into an * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. */ int vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, boolean_t writecounted, struct thread *td) { vm_offset_t max_addr; int docow, error, findspace, rv; bool curmap, fitit; curmap = map == &td->td_proc->p_vmspace->vm_map; if (curmap) { error = kern_mmap_racct_check(td, map, size); if (error != 0) return (error); } /* * We currently can only deal with page aligned file offsets. * The mmap() system call already enforces this by subtracting * the page offset from the file offset, but checking here * catches errors in device drivers (e.g. d_single_mmap() * callbacks) and other internal mapping requests (such as in * exec). */ if (foff & PAGE_MASK) return (EINVAL); if ((flags & MAP_FIXED) == 0) { fitit = TRUE; *addr = round_page(*addr); } else { if (*addr != trunc_page(*addr)) return (EINVAL); fitit = FALSE; } if (flags & MAP_ANON) { if (object != NULL || foff != 0) return (EINVAL); docow = 0; } else if (flags & MAP_PREFAULT_READ) docow = MAP_PREFAULT; else docow = MAP_PREFAULT_PARTIAL; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) docow |= MAP_COPY_ON_WRITE; if (flags & MAP_NOSYNC) docow |= MAP_DISABLE_SYNCER; if (flags & MAP_NOCORE) docow |= MAP_DISABLE_COREDUMP; /* Shared memory is also shared with children. */ if (flags & MAP_SHARED) docow |= MAP_INHERIT_SHARE; if (writecounted) docow |= MAP_WRITECOUNT; if (flags & MAP_STACK) { if (object != NULL) return (EINVAL); docow |= MAP_STACK_GROWS_DOWN; } if ((flags & MAP_EXCL) != 0) docow |= MAP_CHECK_EXCL; if ((flags & MAP_GUARD) != 0) docow |= MAP_CREATE_GUARD; if (fitit) { if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) findspace = VMFS_SUPER_SPACE; else if ((flags & MAP_ALIGNMENT_MASK) != 0) findspace = VMFS_ALIGNED_SPACE(flags >> MAP_ALIGNMENT_SHIFT); else findspace = VMFS_OPTIMAL_SPACE; max_addr = 0; #ifdef MAP_32BIT if ((flags & MAP_32BIT) != 0) max_addr = MAP_32BIT_MAX_ADDR; #endif if (curmap) { rv = vm_map_find_min(map, object, foff, addr, size, round_page((vm_offset_t)td->td_proc->p_vmspace-> vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, findspace, prot, maxprot, docow); } else { rv = vm_map_find(map, object, foff, addr, size, max_addr, findspace, prot, maxprot, docow); } } else { rv = vm_map_fixed(map, object, foff, *addr, size, prot, maxprot, docow); } if (rv == KERN_SUCCESS) { /* * If the process has requested that all future mappings * be wired, then heed this. */ if ((map->flags & MAP_WIREFUTURE) != 0) { vm_map_lock(map); if ((map->flags & MAP_WIREFUTURE) != 0) (void)vm_map_wire_locked(map, *addr, *addr + size, VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); vm_map_unlock(map); } } return (vm_mmap_to_errno(rv)); } /* * Translate a Mach VM return code to zero on success or the appropriate errno * on failure. */ int vm_mmap_to_errno(int rv) { switch (rv) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: return (ENOMEM); case KERN_PROTECTION_FAILURE: return (EACCES); default: return (EINVAL); } } diff --git a/usr.bin/truss/setup.c b/usr.bin/truss/setup.c index b5a1d4e32d2b..db5361a88e86 100644 --- a/usr.bin/truss/setup.c +++ b/usr.bin/truss/setup.c @@ -1,834 +1,834 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Various setup functions for truss. Not the cleanest-written code, * I'm afraid. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "syscall.h" #include "extern.h" struct procabi_table { const char *name; struct procabi *abi; }; static sig_atomic_t detaching; static void enter_syscall(struct trussinfo *, struct threadinfo *, struct ptrace_lwpinfo *); static void new_proc(struct trussinfo *, pid_t, lwpid_t); static struct procabi freebsd = { .type = "FreeBSD", .abi = SYSDECODE_ABI_FREEBSD, .pointer_size = sizeof(void *), .extra_syscalls = STAILQ_HEAD_INITIALIZER(freebsd.extra_syscalls), .syscalls = { NULL } }; #if !defined(__SIZEOF_POINTER__) #error "Use a modern compiler." #endif #if __SIZEOF_POINTER__ > 4 static struct procabi freebsd32 = { .type = "FreeBSD32", .abi = SYSDECODE_ABI_FREEBSD32, .pointer_size = sizeof(uint32_t), .compat_prefix = "freebsd32_", .extra_syscalls = STAILQ_HEAD_INITIALIZER(freebsd32.extra_syscalls), .syscalls = { NULL } }; #endif static struct procabi linux = { .type = "Linux", .abi = SYSDECODE_ABI_LINUX, .pointer_size = sizeof(void *), .extra_syscalls = STAILQ_HEAD_INITIALIZER(linux.extra_syscalls), .syscalls = { NULL } }; #if __SIZEOF_POINTER__ > 4 static struct procabi linux32 = { .type = "Linux32", .abi = SYSDECODE_ABI_LINUX32, .pointer_size = sizeof(uint32_t), .extra_syscalls = STAILQ_HEAD_INITIALIZER(linux32.extra_syscalls), .syscalls = { NULL } }; #endif static struct procabi_table abis[] = { #if __SIZEOF_POINTER__ == 4 { "FreeBSD ELF32", &freebsd }, #elif __SIZEOF_POINTER__ == 8 { "FreeBSD ELF64", &freebsd }, { "FreeBSD ELF32", &freebsd32 }, #else #error "Unsupported pointer size" #endif #if defined(__powerpc64__) { "FreeBSD ELF64 V2", &freebsd }, #endif #if defined(__amd64__) { "FreeBSD a.out", &freebsd32 }, #endif #if defined(__i386__) { "FreeBSD a.out", &freebsd }, #endif #if __SIZEOF_POINTER__ >= 8 { "Linux ELF64", &linux }, { "Linux ELF32", &linux32 }, #else { "Linux ELF32", &linux }, #endif }; /* * setup_and_wait() is called to start a process. All it really does * is fork(), enable tracing in the child, and then exec the given * command. At that point, the child process stops, and the parent * can wake up and deal with it. */ void setup_and_wait(struct trussinfo *info, char *command[]) { pid_t pid; pid = vfork(); if (pid == -1) err(1, "fork failed"); if (pid == 0) { /* Child */ ptrace(PT_TRACE_ME, 0, 0, 0); execvp(command[0], command); err(1, "execvp %s", command[0]); } /* Only in the parent here */ if (waitpid(pid, NULL, 0) < 0) err(1, "unexpect stop in waitpid"); new_proc(info, pid, 0); } /* * start_tracing is called to attach to an existing process. */ void start_tracing(struct trussinfo *info, pid_t pid) { int ret, retry; retry = 10; do { ret = ptrace(PT_ATTACH, pid, NULL, 0); usleep(200); } while (ret && retry-- > 0); if (ret) err(1, "can not attach to target process"); if (waitpid(pid, NULL, 0) < 0) err(1, "Unexpect stop in waitpid"); new_proc(info, pid, 0); } /* * Restore a process back to it's pre-truss state. * Called for SIGINT, SIGTERM, SIGQUIT. This only * applies if truss was told to monitor an already-existing * process. */ void restore_proc(int signo __unused) { detaching = 1; } static void detach_proc(pid_t pid) { int sig, status; /* * Stop the child so that we can detach. Filter out possible * lingering SIGTRAP events buffered in the threads. */ kill(pid, SIGSTOP); for (;;) { if (waitpid(pid, &status, 0) < 0) err(1, "Unexpected error in waitpid"); sig = WIFSTOPPED(status) ? WSTOPSIG(status) : 0; if (sig == SIGSTOP) break; if (sig == SIGTRAP) sig = 0; if (ptrace(PT_CONTINUE, pid, (caddr_t)1, sig) < 0) err(1, "Can not continue for detach"); } if (ptrace(PT_DETACH, pid, (caddr_t)1, 0) < 0) err(1, "Can not detach the process"); kill(pid, SIGCONT); } /* * Determine the ABI. This is called after every exec, and when * a process is first monitored. */ static struct procabi * find_abi(pid_t pid) { size_t len; unsigned int i; int error; int mib[4]; char progt[32]; len = sizeof(progt); mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_SV_NAME; mib[3] = pid; error = sysctl(mib, 4, progt, &len, NULL, 0); if (error != 0) err(2, "can not get sysvec name"); for (i = 0; i < nitems(abis); i++) { if (strcmp(abis[i].name, progt) == 0) return (abis[i].abi); } warnx("ABI %s for pid %ld is not supported", progt, (long)pid); return (NULL); } static struct threadinfo * new_thread(struct procinfo *p, lwpid_t lwpid) { struct threadinfo *nt; /* * If this happens it means there is a bug in truss. Unfortunately * this will kill any processes truss is attached to. */ LIST_FOREACH(nt, &p->threadlist, entries) { if (nt->tid == lwpid) errx(1, "Duplicate thread for LWP %ld", (long)lwpid); } nt = calloc(1, sizeof(struct threadinfo)); if (nt == NULL) err(1, "calloc() failed"); nt->proc = p; nt->tid = lwpid; LIST_INSERT_HEAD(&p->threadlist, nt, entries); return (nt); } static void free_thread(struct threadinfo *t) { LIST_REMOVE(t, entries); free(t); } static void add_threads(struct trussinfo *info, struct procinfo *p) { struct ptrace_lwpinfo pl; struct threadinfo *t; lwpid_t *lwps; int i, nlwps; nlwps = ptrace(PT_GETNUMLWPS, p->pid, NULL, 0); if (nlwps == -1) err(1, "Unable to fetch number of LWPs"); assert(nlwps > 0); lwps = calloc(nlwps, sizeof(*lwps)); nlwps = ptrace(PT_GETLWPLIST, p->pid, (caddr_t)lwps, nlwps); if (nlwps == -1) err(1, "Unable to fetch LWP list"); for (i = 0; i < nlwps; i++) { t = new_thread(p, lwps[i]); if (ptrace(PT_LWPINFO, lwps[i], (caddr_t)&pl, sizeof(pl)) == -1) err(1, "ptrace(PT_LWPINFO)"); if (pl.pl_flags & PL_FLAG_SCE) { info->curthread = t; enter_syscall(info, t, &pl); } } free(lwps); } static void new_proc(struct trussinfo *info, pid_t pid, lwpid_t lwpid) { struct procinfo *np; /* * If this happens it means there is a bug in truss. Unfortunately * this will kill any processes truss is attached to. */ LIST_FOREACH(np, &info->proclist, entries) { if (np->pid == pid) errx(1, "Duplicate process for pid %ld", (long)pid); } if (info->flags & FOLLOWFORKS) if (ptrace(PT_FOLLOW_FORK, pid, NULL, 1) == -1) err(1, "Unable to follow forks for pid %ld", (long)pid); if (ptrace(PT_LWP_EVENTS, pid, NULL, 1) == -1) err(1, "Unable to enable LWP events for pid %ld", (long)pid); np = calloc(1, sizeof(struct procinfo)); np->pid = pid; np->abi = find_abi(pid); LIST_INIT(&np->threadlist); LIST_INSERT_HEAD(&info->proclist, np, entries); if (lwpid != 0) new_thread(np, lwpid); else add_threads(info, np); } static void free_proc(struct procinfo *p) { struct threadinfo *t, *t2; LIST_FOREACH_SAFE(t, &p->threadlist, entries, t2) { free(t); } LIST_REMOVE(p, entries); free(p); } static void detach_all_procs(struct trussinfo *info) { struct procinfo *p, *p2; LIST_FOREACH_SAFE(p, &info->proclist, entries, p2) { detach_proc(p->pid); free_proc(p); } } static struct procinfo * find_proc(struct trussinfo *info, pid_t pid) { struct procinfo *np; LIST_FOREACH(np, &info->proclist, entries) { if (np->pid == pid) return (np); } return (NULL); } /* * Change curthread member based on (pid, lwpid). */ static void find_thread(struct trussinfo *info, pid_t pid, lwpid_t lwpid) { struct procinfo *np; struct threadinfo *nt; np = find_proc(info, pid); assert(np != NULL); LIST_FOREACH(nt, &np->threadlist, entries) { if (nt->tid == lwpid) { info->curthread = nt; return; } } errx(1, "could not find thread"); } /* * When a process exits, it should have exactly one thread left. * All of the other threads should have reported thread exit events. */ static void find_exit_thread(struct trussinfo *info, pid_t pid) { struct procinfo *p; p = find_proc(info, pid); assert(p != NULL); info->curthread = LIST_FIRST(&p->threadlist); assert(info->curthread != NULL); assert(LIST_NEXT(info->curthread, entries) == NULL); } static void alloc_syscall(struct threadinfo *t, struct ptrace_lwpinfo *pl) { u_int i; assert(t->in_syscall == 0); assert(t->cs.number == 0); assert(t->cs.sc == NULL); assert(t->cs.nargs == 0); for (i = 0; i < nitems(t->cs.s_args); i++) assert(t->cs.s_args[i] == NULL); memset(t->cs.args, 0, sizeof(t->cs.args)); t->cs.number = pl->pl_syscall_code; t->in_syscall = 1; } static void free_syscall(struct threadinfo *t) { u_int i; for (i = 0; i < t->cs.nargs; i++) free(t->cs.s_args[i]); memset(&t->cs, 0, sizeof(t->cs)); t->in_syscall = 0; } static void enter_syscall(struct trussinfo *info, struct threadinfo *t, struct ptrace_lwpinfo *pl) { struct syscall *sc; u_int i, narg; alloc_syscall(t, pl); narg = MIN(pl->pl_syscall_narg, nitems(t->cs.args)); if (narg != 0 && ptrace(PT_GET_SC_ARGS, t->tid, (caddr_t)t->cs.args, sizeof(t->cs.args)) != 0) { free_syscall(t); return; } sc = get_syscall(t, t->cs.number, narg); if (sc->unknown) fprintf(info->outfile, "-- UNKNOWN %s SYSCALL %d --\n", t->proc->abi->type, t->cs.number); t->cs.nargs = sc->decode.nargs; assert(sc->decode.nargs <= nitems(t->cs.s_args)); t->cs.sc = sc; /* * At this point, we set up the system call arguments. * We ignore any OUT ones, however -- those are arguments that * are set by the system call, and so are probably meaningless * now. This doesn't currently support arguments that are * passed in *and* out, however. */ #if DEBUG fprintf(stderr, "syscall %s(", sc->name); #endif for (i = 0; i < t->cs.nargs; i++) { #if DEBUG fprintf(stderr, "0x%lx%s", t->cs.args[sc->decode.args[i].offset], i < (t->cs.nargs - 1) ? "," : ""); #endif if (!(sc->decode.args[i].type & OUT)) { t->cs.s_args[i] = print_arg(&sc->decode.args[i], t->cs.args, NULL, info); } } #if DEBUG fprintf(stderr, ")\n"); #endif clock_gettime(CLOCK_REALTIME, &t->before); } /* * When a thread exits voluntarily (including when a thread calls * exit() to trigger a process exit), the thread's internal state * holds the arguments passed to the exit system call. When the * thread's exit is reported, log that system call without a return * value. */ static void thread_exit_syscall(struct trussinfo *info) { struct threadinfo *t; t = info->curthread; if (!t->in_syscall) return; clock_gettime(CLOCK_REALTIME, &t->after); print_syscall_ret(info, 0, NULL); free_syscall(t); } static void exit_syscall(struct trussinfo *info, struct ptrace_lwpinfo *pl) { struct threadinfo *t; struct procinfo *p; struct syscall *sc; struct ptrace_sc_ret psr; u_int i; t = info->curthread; if (!t->in_syscall) return; clock_gettime(CLOCK_REALTIME, &t->after); p = t->proc; if (ptrace(PT_GET_SC_RET, t->tid, (caddr_t)&psr, sizeof(psr)) != 0) { free_syscall(t); return; } sc = t->cs.sc; /* * Here, we only look for arguments that have OUT masked in -- * otherwise, they were handled in enter_syscall(). */ for (i = 0; i < sc->decode.nargs; i++) { char *temp; if (sc->decode.args[i].type & OUT) { /* * If an error occurred, then don't bother * getting the data; it may not be valid. */ if (psr.sr_error != 0) { asprintf(&temp, "0x%lx", - t->cs.args[sc->decode.args[i].offset]); + (long)t->cs.args[sc->decode.args[i].offset]); } else { temp = print_arg(&sc->decode.args[i], t->cs.args, psr.sr_retval, info); } t->cs.s_args[i] = temp; } } print_syscall_ret(info, psr.sr_error, psr.sr_retval); free_syscall(t); /* * If the process executed a new image, check the ABI. If the * new ABI isn't supported, stop tracing this process. */ if (pl->pl_flags & PL_FLAG_EXEC) { assert(LIST_NEXT(LIST_FIRST(&p->threadlist), entries) == NULL); p->abi = find_abi(p->pid); if (p->abi == NULL) { if (ptrace(PT_DETACH, p->pid, (caddr_t)1, 0) < 0) err(1, "Can not detach the process"); free_proc(p); } } } int print_line_prefix(struct trussinfo *info) { struct timespec timediff; struct threadinfo *t; int len; len = 0; t = info->curthread; if (info->flags & (FOLLOWFORKS | DISPLAYTIDS)) { if (info->flags & FOLLOWFORKS) len += fprintf(info->outfile, "%5d", t->proc->pid); if ((info->flags & (FOLLOWFORKS | DISPLAYTIDS)) == (FOLLOWFORKS | DISPLAYTIDS)) len += fprintf(info->outfile, " "); if (info->flags & DISPLAYTIDS) len += fprintf(info->outfile, "%6d", t->tid); len += fprintf(info->outfile, ": "); } if (info->flags & ABSOLUTETIMESTAMPS) { timespecsub(&t->after, &info->start_time, &timediff); len += fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } if (info->flags & RELATIVETIMESTAMPS) { timespecsub(&t->after, &t->before, &timediff); len += fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } return (len); } static void report_thread_death(struct trussinfo *info) { struct threadinfo *t; t = info->curthread; clock_gettime(CLOCK_REALTIME, &t->after); print_line_prefix(info); fprintf(info->outfile, "\n", (long)t->tid); } static void report_thread_birth(struct trussinfo *info) { struct threadinfo *t; t = info->curthread; clock_gettime(CLOCK_REALTIME, &t->after); t->before = t->after; print_line_prefix(info); fprintf(info->outfile, "\n", (long)t->tid); } static void report_exit(struct trussinfo *info, siginfo_t *si) { struct threadinfo *t; t = info->curthread; clock_gettime(CLOCK_REALTIME, &t->after); print_line_prefix(info); if (si->si_code == CLD_EXITED) fprintf(info->outfile, "process exit, rval = %u\n", si->si_status); else fprintf(info->outfile, "process killed, signal = %u%s\n", si->si_status, si->si_code == CLD_DUMPED ? " (core dumped)" : ""); } static void report_new_child(struct trussinfo *info) { struct threadinfo *t; t = info->curthread; clock_gettime(CLOCK_REALTIME, &t->after); t->before = t->after; print_line_prefix(info); fprintf(info->outfile, "\n"); } void decode_siginfo(FILE *fp, siginfo_t *si) { const char *str; fprintf(fp, " code="); str = sysdecode_sigcode(si->si_signo, si->si_code); if (str == NULL) fprintf(fp, "%d", si->si_code); else fprintf(fp, "%s", str); switch (si->si_code) { case SI_NOINFO: break; case SI_QUEUE: fprintf(fp, " value=%p", si->si_value.sival_ptr); /* FALLTHROUGH */ case SI_USER: case SI_LWP: fprintf(fp, " pid=%jd uid=%jd", (intmax_t)si->si_pid, (intmax_t)si->si_uid); break; case SI_TIMER: fprintf(fp, " value=%p", si->si_value.sival_ptr); fprintf(fp, " timerid=%d", si->si_timerid); fprintf(fp, " overrun=%d", si->si_overrun); if (si->si_errno != 0) fprintf(fp, " errno=%d", si->si_errno); break; case SI_ASYNCIO: fprintf(fp, " value=%p", si->si_value.sival_ptr); break; case SI_MESGQ: fprintf(fp, " value=%p", si->si_value.sival_ptr); fprintf(fp, " mqd=%d", si->si_mqd); break; default: switch (si->si_signo) { case SIGILL: case SIGFPE: case SIGSEGV: case SIGBUS: fprintf(fp, " trapno=%d", si->si_trapno); fprintf(fp, " addr=%p", si->si_addr); break; case SIGCHLD: fprintf(fp, " pid=%jd uid=%jd", (intmax_t)si->si_pid, (intmax_t)si->si_uid); fprintf(fp, " status=%d", si->si_status); break; } } } static void report_signal(struct trussinfo *info, siginfo_t *si, struct ptrace_lwpinfo *pl) { struct threadinfo *t; const char *signame; t = info->curthread; clock_gettime(CLOCK_REALTIME, &t->after); print_line_prefix(info); signame = sysdecode_signal(si->si_status); if (signame == NULL) signame = "?"; fprintf(info->outfile, "SIGNAL %u (%s)", si->si_status, signame); if (pl->pl_event == PL_EVENT_SIGNAL && pl->pl_flags & PL_FLAG_SI) decode_siginfo(info->outfile, &pl->pl_siginfo); fprintf(info->outfile, "\n"); } /* * Wait for events until all the processes have exited or truss has been * asked to stop. */ void eventloop(struct trussinfo *info) { struct ptrace_lwpinfo pl; siginfo_t si; int pending_signal; while (!LIST_EMPTY(&info->proclist)) { if (detaching) { detach_all_procs(info); return; } if (waitid(P_ALL, 0, &si, WTRAPPED | WEXITED) == -1) { if (errno == EINTR) continue; err(1, "Unexpected error from waitid"); } assert(si.si_signo == SIGCHLD); switch (si.si_code) { case CLD_EXITED: case CLD_KILLED: case CLD_DUMPED: find_exit_thread(info, si.si_pid); if ((info->flags & COUNTONLY) == 0) { if (si.si_code == CLD_EXITED) thread_exit_syscall(info); report_exit(info, &si); } free_proc(info->curthread->proc); info->curthread = NULL; break; case CLD_TRAPPED: if (ptrace(PT_LWPINFO, si.si_pid, (caddr_t)&pl, sizeof(pl)) == -1) err(1, "ptrace(PT_LWPINFO)"); if (pl.pl_flags & PL_FLAG_CHILD) { new_proc(info, si.si_pid, pl.pl_lwpid); assert(LIST_FIRST(&info->proclist)->abi != NULL); } else if (pl.pl_flags & PL_FLAG_BORN) new_thread(find_proc(info, si.si_pid), pl.pl_lwpid); find_thread(info, si.si_pid, pl.pl_lwpid); if (si.si_status == SIGTRAP && (pl.pl_flags & (PL_FLAG_BORN|PL_FLAG_EXITED| PL_FLAG_SCE|PL_FLAG_SCX)) != 0) { if (pl.pl_flags & PL_FLAG_BORN) { if ((info->flags & COUNTONLY) == 0) report_thread_birth(info); } else if (pl.pl_flags & PL_FLAG_EXITED) { if ((info->flags & COUNTONLY) == 0) report_thread_death(info); free_thread(info->curthread); info->curthread = NULL; } else if (pl.pl_flags & PL_FLAG_SCE) enter_syscall(info, info->curthread, &pl); else if (pl.pl_flags & PL_FLAG_SCX) exit_syscall(info, &pl); pending_signal = 0; } else if (pl.pl_flags & PL_FLAG_CHILD) { if ((info->flags & COUNTONLY) == 0) report_new_child(info); pending_signal = 0; } else { if ((info->flags & NOSIGS) == 0) report_signal(info, &si, &pl); pending_signal = si.si_status; } ptrace(PT_SYSCALL, si.si_pid, (caddr_t)1, pending_signal); break; case CLD_STOPPED: errx(1, "waitid reported CLD_STOPPED"); case CLD_CONTINUED: break; } } } diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h index 53a1fd6ee8d7..c16aed732230 100644 --- a/usr.bin/truss/syscall.h +++ b/usr.bin/truss/syscall.h @@ -1,276 +1,276 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * System call arguments come in several flavours: * Hex -- values that should be printed in hex (addresses) * Octal -- Same as above, but octal * Int -- normal integer values (file descriptors, for example) * LongHex -- long value that should be printed in hex * Name -- pointer to a NULL-terminated string. * BinString -- pointer to an array of chars, printed via strvisx(). * Ptr -- pointer to some unspecified structure. Just print as hex for now. * Stat -- a pointer to a stat buffer. Prints a couple fields. * Stat11 -- a pointer to a freebsd 11 stat buffer. Prints a couple fields. * StatFs -- a pointer to a statfs buffer. Prints a few fields. * Ioctl -- an ioctl command. Woefully limited. * Quad -- a double-word value. e.g., lseek(int, offset_t, int) * Signal -- a signal number. Prints the signal name (SIGxxx) * Sockaddr -- a pointer to a struct sockaddr. Prints symbolic AF, and IP:Port * StringArray -- a pointer to an array of string pointers. * Timespec -- a pointer to a struct timespec. Prints both elements. * Timeval -- a pointer to a struct timeval. Prints both elements. * Timeval2 -- a pointer to two struct timevals. Prints both elements of both. * Itimerval -- a pointer to a struct itimerval. Prints all elements. * Pollfd -- a pointer to an array of struct pollfd. Prints .fd and .events. * Fd_set -- a pointer to an array of fd_set. Prints the fds that are set. * Sigaction -- a pointer to a struct sigaction. Prints all elements. * Sigset -- a pointer to a sigset_t. Prints the signals that are set. * Sigprocmask -- the first argument to sigprocmask(). Prints the name. * Kevent -- a pointer to an array of struct kevents. Prints all elements. * Pathconf -- the 2nd argument of pathconf(). * Utrace -- utrace(2) buffer. * CapRights -- a pointer to a cap_rights_t. Prints all set capabilities. * * In addition, the pointer types (String, Ptr) may have OUT masked in -- * this means that the data is set on *return* from the system call -- or * IN (meaning that the data is passed *into* the system call). */ enum Argtype { None = 1, /* Scalar integers. */ Socklent, Octal, Int, UInt, Hex, Long, LongHex, Sizet, Quad, QuadHex, /* Encoded scalar values. */ Accessmode, Acltype, AiofsyncOp, Atfd, Atflags, CapFcntlRights, Closerangeflags, Extattrnamespace, Fadvice, Fcntl, Fcntlflag, FileFlags, Flockop, Getfsstatmode, Idtype, Ioctl, Kldsymcmd, Kldunloadflags, LioMode, Madvice, Minherit, Msgflags, Mlockall, Mmapflags, Mountflags, Mprot, Msync, Open, Pathconf, Pipe2, Procctl, Priowhich, Ptraceop, Sendfileflags, Sendfilehdtr, Quotactlcmd, Reboothowto, Resource, Rforkflags, Rtpriofunc, RusageWho, Schedpolicy, ShmFlags, Shutdown, Signal, Sigprocmask, Sockdomain, Sockoptlevel, Sockoptname, Sockprotocol, Socktype, Sysarch, Sysctl, Umtxop, Waitoptions, Whence, /* Pointers to non-structures. */ Ptr, AiocbArray, AiocbPointer, BinString, CapRights, ExecArgs, ExecEnv, ExitStatus, Fd_set, IntArray, Iovec, Name, PipeFds, PSig, PQuadHex, PUInt, Readlinkres, ShmName, StringArray, /* Pointers to structures. */ Aiocb, Itimerval, Kevent, Kevent11, LinuxSockArgs, Msghdr, Pollfd, Rlimit, Rusage, Schedparam, Sctpsndrcvinfo, Sigaction, Sigevent, Siginfo, Sigset, Sockaddr, Stat, Stat11, StatFs, Timespec, Timespec2, Timeval, Timeval2, Utrace, MAX_ARG_TYPE, }; #define ARG_MASK 0xff #define OUT 0x100 #define IN /*0x20*/0 _Static_assert(ARG_MASK > MAX_ARG_TYPE, "ARG_MASK overlaps with Argtype values"); struct syscall_arg { enum Argtype type; int offset; }; struct syscall_decode { const char *name; /* Name for calling convention lookup. */ /* * Syscall return type: * 0: no return value (e.g. exit) * 1: normal return value (a single int/long/pointer) * 2: off_t return value (two values for 32-bit ABIs) */ u_int ret_type; u_int nargs; /* number of meaningful arguments */ struct syscall_arg args[10]; /* Hopefully no syscalls with > 10 args */ }; struct syscall { STAILQ_ENTRY(syscall) entries; const char *name; /* Name to be displayed, might be malloc()'d */ struct syscall_decode decode; struct timespec time; /* Time spent for this call */ int ncalls; /* Number of calls */ int nerror; /* Number of calls that returned with error */ bool unknown; /* Unknown system call */ }; struct syscall *get_syscall(struct threadinfo *, u_int, u_int); -char *print_arg(struct syscall_arg *, unsigned long *, register_t *, +char *print_arg(struct syscall_arg *, syscallarg_t *, syscallarg_t *, struct trussinfo *); /* * Linux Socket defines */ #define LINUX_SOCKET 1 #define LINUX_BIND 2 #define LINUX_CONNECT 3 #define LINUX_LISTEN 4 #define LINUX_ACCEPT 5 #define LINUX_GETSOCKNAME 6 #define LINUX_GETPEERNAME 7 #define LINUX_SOCKETPAIR 8 #define LINUX_SEND 9 #define LINUX_RECV 10 #define LINUX_SENDTO 11 #define LINUX_RECVFROM 12 #define LINUX_SHUTDOWN 13 #define LINUX_SETSOCKOPT 14 #define LINUX_GETSOCKOPT 15 #define LINUX_SENDMSG 16 #define LINUX_RECVMSG 17 -#define PAD_(t) (sizeof(register_t) <= sizeof(t) ? \ - 0 : sizeof(register_t) - sizeof(t)) +#define PAD_(t) (sizeof(syscallarg_t) <= sizeof(t) ? \ + 0 : sizeof(syscallarg_t) - sizeof(t)) #if BYTE_ORDER == LITTLE_ENDIAN #define PADL_(t) 0 #define PADR_(t) PAD_(t) #else #define PADL_(t) PAD_(t) #define PADR_(t) 0 #endif typedef int l_int; typedef uint32_t l_ulong; struct linux_socketcall_args { char what_l_[PADL_(l_int)]; l_int what; char what_r_[PADR_(l_int)]; char args_l_[PADL_(l_ulong)]; l_ulong args; char args_r_[PADR_(l_ulong)]; }; void print_syscall(struct trussinfo *); -void print_syscall_ret(struct trussinfo *, int, register_t *); +void print_syscall_ret(struct trussinfo *, int, syscallarg_t *); void print_summary(struct trussinfo *trussinfo); diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index d611f3f73471..bd055fb687ed 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -1,2814 +1,2814 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This file has routines used to print out system calls and their * arguments. */ #include #include #include #define _WANT_FREEBSD11_KEVENT #include #include #include #include #include #include #include #include #include #include #define _WANT_FREEBSD11_STAT #include #include #include #include #include #include #include #include #include #include #include #define _WANT_KERNEL_ERRNO #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "extern.h" #include "syscall.h" /* * This should probably be in its own file, sorted alphabetically. * * Note: We only scan this table on the initial syscall number to calling * convention lookup, i.e. once each time a new syscall is encountered. This * is unlikely to be a performance issue, but if it is we could sort this array * and use a binary search instead. */ static const struct syscall_decode decoded_syscalls[] = { /* Native ABI */ { .name = "__acl_aclcheck_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_delete_fd", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_file", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_get_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__cap_rights_get", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } }, { .name = "__getcwd", .ret_type = 1, .nargs = 2, .args = { { Name | OUT, 0 }, { Int, 1 } } }, { .name = "__realpathat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Name | OUT, 2 }, { Sizet, 3 }, { Int, 4} } }, { .name = "_umtx_op", .ret_type = 1, .nargs = 5, .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 }, { Ptr, 4 } } }, { .name = "accept", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "access", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "aio_cancel", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Aiocb, 1 } } }, { .name = "aio_error", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_fsync", .ret_type = 1, .nargs = 2, .args = { { AiofsyncOp, 0 }, { Aiocb, 1 } } }, { .name = "aio_mlock", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_read", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_return", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_suspend", .ret_type = 1, .nargs = 3, .args = { { AiocbArray, 0 }, { Int, 1 }, { Timespec, 2 } } }, { .name = "aio_waitcomplete", .ret_type = 1, .nargs = 2, .args = { { AiocbPointer | OUT, 0 }, { Timespec, 1 } } }, { .name = "aio_write", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "bindat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "break", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "cap_fcntls_get", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights | OUT, 1 } } }, { .name = "cap_fcntls_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights, 1 } } }, { .name = "cap_getmode", .ret_type = 1, .nargs = 1, .args = { { PUInt | OUT, 0 } } }, { .name = "cap_rights_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapRights, 1 } } }, { .name = "chdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "chflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "chflagsat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { FileFlags, 2 }, { Atflags, 3 } } }, { .name = "chmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "chown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "chroot", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "clock_gettime", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "closefrom", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "close_range", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Closerangeflags, 2 } } }, { .name = "compat11.fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat11 | OUT, 1 } } }, { .name = "compat11.fstatat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat11 | OUT, 2 }, { Atflags, 3 } } }, { .name = "compat11.kevent", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { Kevent11, 1 }, { Int, 2 }, { Kevent11 | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "compat11.lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } }, { .name = "compat11.mknod", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Int, 2 } } }, { .name = "compat11.mknodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Int, 3 } } }, { .name = "compat11.stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } }, { .name = "connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "connectat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "dup", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "dup2", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "eaccess", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "exit", .ret_type = 0, .nargs = 1, .args = { { Hex, 0 } } }, { .name = "extattr_delete_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_get_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_list_fd", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_file", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_link", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_set_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattrctl", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Hex, 1 }, { Name, 2 }, { Extattrnamespace, 3 }, { Name, 4 } } }, { .name = "faccessat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Accessmode, 2 }, { Atflags, 3 } } }, { .name = "fchflags", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { FileFlags, 1 } } }, { .name = "fchmod", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Octal, 1 } } }, { .name = "fchmodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Atflags, 3 } } }, { .name = "fchown", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "fchownat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Int, 2 }, { Int, 3 }, { Atflags, 4 } } }, { .name = "fcntl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Fcntl, 1 }, { Fcntlflag, 2 } } }, { .name = "fdatasync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "flock", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Flockop, 1 } } }, { .name = "fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat | OUT, 1 } } }, { .name = "fstatat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat | OUT, 2 }, { Atflags, 3 } } }, { .name = "fstatfs", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { StatFs | OUT, 1 } } }, { .name = "fsync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "ftruncate", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { QuadHex | IN, 1 } } }, { .name = "futimens", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec2 | IN, 1 } } }, { .name = "futimes", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timeval2 | IN, 1 } } }, { .name = "futimesat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timeval2 | IN, 2 } } }, { .name = "getdirentries", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { PQuadHex | OUT, 3 } } }, { .name = "getfsstat", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Long, 1 }, { Getfsstatmode, 2 } } }, { .name = "getitimer", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Itimerval | OUT, 2 } } }, { .name = "getpeername", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getpgid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getpriority", .ret_type = 1, .nargs = 2, .args = { { Priowhich, 0 }, { Int, 1 } } }, { .name = "getrandom", .ret_type = 1, .nargs = 3, .args = { { BinString | OUT, 0 }, { Sizet, 1 }, { UInt, 2 } } }, { .name = "getrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | OUT, 1 } } }, { .name = "getrusage", .ret_type = 1, .nargs = 2, .args = { { RusageWho, 0 }, { Rusage | OUT, 1 } } }, { .name = "getsid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getsockname", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { Kevent, 1 }, { Int, 2 }, { Kevent | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "kill", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { Signal | IN, 1 } } }, { .name = "kldfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldfirstmod", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldload", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldnext", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr, 1 } } }, { .name = "kldsym", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Kldsymcmd, 1 }, { Ptr, 2 } } }, { .name = "kldunload", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldunloadf", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Kldunloadflags, 1 } } }, { .name = "kse_release", .ret_type = 0, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "lchflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "lchmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "lchown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "linkat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 }, { Atflags, 4 } } }, { .name = "lio_listio", .ret_type = 1, .nargs = 4, .args = { { LioMode, 0 }, { AiocbArray, 1 }, { Int, 2 }, { Sigevent, 3 } } }, { .name = "listen", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { QuadHex, 1 }, { Whence, 2 } } }, { .name = "lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "lutimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "madvise", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Madvice, 2 } } }, { .name = "minherit", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Minherit, 2 } } }, { .name = "mkdir", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkdirat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mkfifo", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkfifoat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mknod", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Quad, 2 } } }, { .name = "mknodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Quad, 3 } } }, { .name = "mlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "mlockall", .ret_type = 1, .nargs = 1, .args = { { Mlockall, 0 } } }, { .name = "mmap", .ret_type = 1, .nargs = 6, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 }, { Mmapflags, 3 }, { Int, 4 }, { QuadHex, 5 } } }, { .name = "modfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "mount", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Name, 1 }, { Mountflags, 2 }, { Ptr, 3 } } }, { .name = "mprotect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 } } }, { .name = "msync", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Msync, 2 } } }, { .name = "munlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "munmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "nanosleep", .ret_type = 1, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "nmount", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { UInt, 1 }, { Mountflags, 2 } } }, { .name = "open", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Open, 1 }, { Octal, 2 } } }, { .name = "openat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Open, 2 }, { Octal, 3 } } }, { .name = "pathconf", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Pathconf, 1 } } }, { .name = "pipe", .ret_type = 1, .nargs = 1, .args = { { PipeFds | OUT, 0 } } }, { .name = "pipe2", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Pipe2, 1 } } }, { .name = "poll", .ret_type = 1, .nargs = 3, .args = { { Pollfd, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "posix_fadvise", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { QuadHex, 1 }, { QuadHex, 2 }, { Fadvice, 3 } } }, { .name = "posix_openpt", .ret_type = 1, .nargs = 1, .args = { { Open, 0 } } }, { .name = "pread", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "procctl", .ret_type = 1, .nargs = 4, .args = { { Idtype, 0 }, { Quad, 1 }, { Procctl, 2 }, { Ptr, 3 } } }, { .name = "ptrace", .ret_type = 1, .nargs = 4, .args = { { Ptraceop, 0 }, { Int, 1 }, { Ptr, 2 }, { Int, 3 } } }, { .name = "pwrite", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "quotactl", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Quotactlcmd, 1 }, { Int, 2 }, { Ptr, 3 } } }, { .name = "read", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Readlinkres | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlinkat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Readlinkres | OUT, 2 }, { Sizet, 3 } } }, { .name = "readv", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Iovec | OUT, 1 }, { Int, 2 } } }, { .name = "reboot", .ret_type = 1, .nargs = 1, .args = { { Reboothowto, 0 } } }, { .name = "recvfrom", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | OUT, 4 }, { Ptr | OUT, 5 } } }, { .name = "recvmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Msghdr | OUT, 1 }, { Msgflags, 2 } } }, { .name = "rename", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "rmdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "rtprio", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "rtprio_thread", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "sched_get_priority_max", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_get_priority_min", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_getparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam | OUT, 1 } } }, { .name = "sched_getscheduler", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "sched_rr_get_interval", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "sched_setparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam, 1 } } }, { .name = "sched_setscheduler", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Schedpolicy, 1 }, { Schedparam, 2 } } }, { .name = "sctp_generic_recvmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Iovec | OUT, 1 }, { Int, 2 }, { Sockaddr | OUT, 3 }, { Ptr | OUT, 4 }, { Sctpsndrcvinfo | OUT, 5 }, { Ptr | OUT, 6 } } }, { .name = "sctp_generic_sendmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { Sockaddr | IN, 3 }, { Socklent, 4 }, { Sctpsndrcvinfo | IN, 5 }, { Msgflags, 6 } } }, { .name = "sctp_generic_sendmsg_iov", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Iovec | IN, 1 }, { Int, 2 }, { Sockaddr | IN, 3 }, { Socklent, 4 }, { Sctpsndrcvinfo | IN, 5 }, { Msgflags, 6 } } }, { .name = "sendfile", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Int, 1 }, { QuadHex, 2 }, { Sizet, 3 }, { Sendfilehdtr, 4 }, { QuadHex | OUT, 5 }, { Sendfileflags, 6 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, { .name = "sendmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Msghdr | IN, 1 }, { Msgflags, 2 } } }, { .name = "sendto", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | IN, 4 }, { Socklent | IN, 5 } } }, { .name = "setitimer", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Itimerval, 1 }, { Itimerval | OUT, 2 } } }, { .name = "setpriority", .ret_type = 1, .nargs = 3, .args = { { Priowhich, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "setrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | IN, 1 } } }, { .name = "setsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | IN, 3 }, { Socklent, 4 } } }, { .name = "shm_open", .ret_type = 1, .nargs = 3, .args = { { ShmName | IN, 0 }, { Open, 1 }, { Octal, 2 } } }, { .name = "shm_open2", .ret_type = 1, .nargs = 5, .args = { { ShmName | IN, 0 }, { Open, 1 }, { Octal, 2 }, { ShmFlags, 3 }, { Name | IN, 4 } } }, { .name = "shm_rename", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Name | IN, 1 }, { Hex, 2 } } }, { .name = "shm_unlink", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Shutdown, 1 } } }, { .name = "sigaction", .ret_type = 1, .nargs = 3, .args = { { Signal, 0 }, { Sigaction | IN, 1 }, { Sigaction | OUT, 2 } } }, { .name = "sigpending", .ret_type = 1, .nargs = 1, .args = { { Sigset | OUT, 0 } } }, { .name = "sigprocmask", .ret_type = 1, .nargs = 3, .args = { { Sigprocmask, 0 }, { Sigset, 1 }, { Sigset | OUT, 2 } } }, { .name = "sigqueue", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Signal, 1 }, { LongHex, 2 } } }, { .name = "sigreturn", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "sigsuspend", .ret_type = 1, .nargs = 1, .args = { { Sigset | IN, 0 } } }, { .name = "sigtimedwait", .ret_type = 1, .nargs = 3, .args = { { Sigset | IN, 0 }, { Siginfo | OUT, 1 }, { Timespec | IN, 2 } } }, { .name = "sigwait", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { PSig | OUT, 1 } } }, { .name = "sigwaitinfo", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { Siginfo | OUT, 1 } } }, { .name = "socket", .ret_type = 1, .nargs = 3, .args = { { Sockdomain, 0 }, { Socktype, 1 }, { Sockprotocol, 2 } } }, { .name = "stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "statfs", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { StatFs | OUT, 1 } } }, { .name = "symlink", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "symlinkat", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Atfd, 1 }, { Name, 2 } } }, { .name = "sysarch", .ret_type = 1, .nargs = 2, .args = { { Sysarch, 0 }, { Ptr, 1 } } }, { .name = "__sysctl", .ret_type = 1, .nargs = 6, .args = { { Sysctl, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 }, { Ptr, 4 }, { Sizet, 5 } } }, { .name = "__sysctlbyname", .ret_type = 1, .nargs = 6, .args = { { Name, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 }, { Ptr, 4}, { Sizet, 5 } } }, { .name = "thr_kill", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Signal, 1 } } }, { .name = "thr_self", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "thr_set_name", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Name, 1 } } }, { .name = "truncate", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { QuadHex | IN, 1 } } }, #if 0 /* Does not exist */ { .name = "umount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Int, 2 } } }, #endif { .name = "unlink", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "unlinkat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Atflags, 2 } } }, { .name = "unmount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Mountflags, 1 } } }, { .name = "utimensat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timespec2 | IN, 2 }, { Atflags, 3 } } }, { .name = "utimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "utrace", .ret_type = 1, .nargs = 1, .args = { { Utrace, 0 } } }, { .name = "wait4", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { ExitStatus | OUT, 1 }, { Waitoptions, 2 }, { Rusage | OUT, 3 } } }, { .name = "wait6", .ret_type = 1, .nargs = 6, .args = { { Idtype, 0 }, { Quad, 1 }, { ExitStatus | OUT, 2 }, { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Siginfo | OUT, 5 } } }, { .name = "write", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 } } }, { .name = "writev", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Iovec | IN, 1 }, { Int, 2 } } }, /* Linux ABI */ { .name = "linux_access", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Accessmode, 1 } } }, { .name = "linux_execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "linux_lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Whence, 2 } } }, { .name = "linux_mkdir", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Int, 1 } } }, { .name = "linux_newfstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_newstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_open", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Hex, 1 }, { Octal, 2 } } }, { .name = "linux_readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Name | OUT, 1 }, { Sizet, 2 } } }, { .name = "linux_socketcall", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { LinuxSockArgs, 1 } } }, { .name = "linux_stat64", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, }; static STAILQ_HEAD(, syscall) seen_syscalls; /* Xlat idea taken from strace */ struct xlat { int val; const char *str; }; #define X(a) { a, #a }, #define XEND { 0, NULL } static struct xlat poll_flags[] = { X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR) X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND) X(POLLWRBAND) X(POLLINIGNEOF) X(POLLRDHUP) XEND }; static struct xlat sigaction_flags[] = { X(SA_ONSTACK) X(SA_RESTART) X(SA_RESETHAND) X(SA_NOCLDSTOP) X(SA_NODEFER) X(SA_NOCLDWAIT) X(SA_SIGINFO) XEND }; static struct xlat linux_socketcall_ops[] = { X(LINUX_SOCKET) X(LINUX_BIND) X(LINUX_CONNECT) X(LINUX_LISTEN) X(LINUX_ACCEPT) X(LINUX_GETSOCKNAME) X(LINUX_GETPEERNAME) X(LINUX_SOCKETPAIR) X(LINUX_SEND) X(LINUX_RECV) X(LINUX_SENDTO) X(LINUX_RECVFROM) X(LINUX_SHUTDOWN) X(LINUX_SETSOCKOPT) X(LINUX_GETSOCKOPT) X(LINUX_SENDMSG) X(LINUX_RECVMSG) XEND }; static struct xlat lio_modes[] = { X(LIO_WAIT) X(LIO_NOWAIT) XEND }; static struct xlat lio_opcodes[] = { X(LIO_WRITE) X(LIO_READ) X(LIO_READV) X(LIO_WRITEV) X(LIO_NOP) XEND }; static struct xlat aio_fsync_ops[] = { X(O_SYNC) XEND }; #undef X #undef XEND /* * Searches an xlat array for a value, and returns it if found. Otherwise * return a string representation. */ static const char * lookup(struct xlat *xlat, int val, int base) { static char tmp[16]; for (; xlat->str != NULL; xlat++) if (xlat->val == val) return (xlat->str); switch (base) { case 8: sprintf(tmp, "0%o", val); break; case 16: sprintf(tmp, "0x%x", val); break; case 10: sprintf(tmp, "%u", val); break; default: errx(1, "Unknown lookup base"); } return (tmp); } static const char * xlookup(struct xlat *xlat, int val) { return (lookup(xlat, val, 16)); } /* * Searches an xlat array containing bitfield values. Remaining bits * set after removing the known ones are printed at the end: * IN|0x400. */ static char * xlookup_bits(struct xlat *xlat, int val) { int len, rem; static char str[512]; len = 0; rem = val; for (; xlat->str != NULL; xlat++) { if ((xlat->val & rem) == xlat->val) { /* * Don't print the "all-bits-zero" string unless all * bits are really zero. */ if (xlat->val == 0 && val != 0) continue; len += sprintf(str + len, "%s|", xlat->str); rem &= ~(xlat->val); } } /* * If we have leftover bits or didn't match anything, print * the remainder. */ if (rem || len == 0) len += sprintf(str + len, "0x%x", rem); if (len && str[len - 1] == '|') len--; str[len] = 0; return (str); } static void print_integer_arg(const char *(*decoder)(int), FILE *fp, int value) { const char *str; str = decoder(value); if (str != NULL) fputs(str, fp); else fprintf(fp, "%d", value); } static bool print_mask_arg_part(bool (*decoder)(FILE *, int, int *), FILE *fp, int value, int *rem) { return (decoder(fp, value, rem)); } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), FILE *fp, int value) { int rem; if (!print_mask_arg_part(decoder, fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), FILE *fp, uint32_t value) { uint32_t rem; if (!decoder(fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } /* * Add argument padding to subsequent system calls after Quad * syscall arguments as needed. This used to be done by hand in the * decoded_syscalls table which was ugly and error prone. It is * simpler to do the fixup of offsets at initialization time than when * decoding arguments. */ static void quad_fixup(struct syscall_decode *sc) { int offset, prev; u_int i; offset = 0; prev = -1; for (i = 0; i < sc->nargs; i++) { /* This arg type is a dummy that doesn't use offset. */ if ((sc->args[i].type & ARG_MASK) == PipeFds) continue; assert(prev < sc->args[i].offset); prev = sc->args[i].offset; sc->args[i].offset += offset; switch (sc->args[i].type & ARG_MASK) { case Quad: case QuadHex: #if defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) /* * 64-bit arguments on 32-bit powerpc and arm must be * 64-bit aligned. If the current offset is * not aligned, the calling convention inserts * a 32-bit pad argument that should be skipped. */ if (sc->args[i].offset % 2 == 1) { sc->args[i].offset++; offset++; } #endif offset++; default: break; } } } static struct syscall * find_syscall(struct procabi *abi, u_int number) { struct extra_syscall *es; if (number < nitems(abi->syscalls)) return (abi->syscalls[number]); STAILQ_FOREACH(es, &abi->extra_syscalls, entries) { if (es->number == number) return (es->sc); } return (NULL); } static void add_syscall(struct procabi *abi, u_int number, struct syscall *sc) { struct extra_syscall *es; /* * quad_fixup() is currently needed for all 32-bit ABIs. * TODO: This should probably be a function pointer inside struct * procabi instead. */ if (abi->pointer_size == 4) quad_fixup(&sc->decode); if (number < nitems(abi->syscalls)) { assert(abi->syscalls[number] == NULL); abi->syscalls[number] = sc; } else { es = malloc(sizeof(*es)); es->sc = sc; es->number = number; STAILQ_INSERT_TAIL(&abi->extra_syscalls, es, entries); } STAILQ_INSERT_HEAD(&seen_syscalls, sc, entries); } /* * If/when the list gets big, it might be desirable to do it * as a hash table or binary search. */ struct syscall * get_syscall(struct threadinfo *t, u_int number, u_int nargs) { struct syscall *sc; struct procabi *procabi; const char *sysdecode_name; const char *lookup_name; const char *name; u_int i; procabi = t->proc->abi; sc = find_syscall(procabi, number); if (sc != NULL) return (sc); /* Memory is not explicitly deallocated, it's released on exit(). */ sysdecode_name = sysdecode_syscallname(procabi->abi, number); if (sysdecode_name == NULL) asprintf(__DECONST(char **, &name), "#%d", number); else name = sysdecode_name; sc = calloc(1, sizeof(*sc)); sc->name = name; /* Also decode compat syscalls arguments by stripping the prefix. */ lookup_name = name; if (procabi->compat_prefix != NULL && strncmp(procabi->compat_prefix, name, strlen(procabi->compat_prefix)) == 0) lookup_name += strlen(procabi->compat_prefix); for (i = 0; i < nitems(decoded_syscalls); i++) { if (strcmp(lookup_name, decoded_syscalls[i].name) == 0) { sc->decode = decoded_syscalls[i]; add_syscall(t->proc->abi, number, sc); return (sc); } } /* It is unknown. Add it into the list. */ #if DEBUG fprintf(stderr, "unknown syscall %s -- setting args to %d\n", name, nargs); #endif sc->unknown = sysdecode_name == NULL; sc->decode.ret_type = 1; /* Assume 1 return value. */ sc->decode.nargs = nargs; for (i = 0; i < nargs; i++) { sc->decode.args[i].offset = i; /* Treat all unknown arguments as LongHex. */ sc->decode.args[i].type = LongHex; } add_syscall(t->proc->abi, number, sc); return (sc); } /* * Copy a fixed amount of bytes from the process. */ static int get_struct(pid_t pid, psaddr_t offset, void *buf, size_t len) { struct ptrace_io_desc iorequest; iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (void *)(uintptr_t)offset; iorequest.piod_addr = buf; iorequest.piod_len = len; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) return (-1); return (0); } #define MAXSIZE 4096 /* * Copy a string from the process. Note that it is * expected to be a C string, but if max is set, it will * only get that much. */ static char * get_string(pid_t pid, psaddr_t addr, int max) { struct ptrace_io_desc iorequest; char *buf, *nbuf; size_t offset, size, totalsize; offset = 0; if (max) size = max + 1; else { /* Read up to the end of the current page. */ size = PAGE_SIZE - (addr % PAGE_SIZE); if (size > MAXSIZE) size = MAXSIZE; } totalsize = size; buf = malloc(totalsize); if (buf == NULL) return (NULL); for (;;) { iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (void *)((uintptr_t)addr + offset); iorequest.piod_addr = buf + offset; iorequest.piod_len = size; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) { free(buf); return (NULL); } if (memchr(buf + offset, '\0', size) != NULL) return (buf); offset += size; if (totalsize < MAXSIZE && max == 0) { size = MAXSIZE - totalsize; if (size > PAGE_SIZE) size = PAGE_SIZE; nbuf = realloc(buf, totalsize + size); if (nbuf == NULL) { buf[totalsize - 1] = '\0'; return (buf); } buf = nbuf; totalsize += size; } else { buf[totalsize - 1] = '\0'; return (buf); } } } static const char * strsig2(int sig) { static char tmp[32]; const char *signame; signame = sysdecode_signal(sig); if (signame == NULL) { snprintf(tmp, sizeof(tmp), "%d", sig); signame = tmp; } return (signame); } static void print_kevent(FILE *fp, struct kevent *ke) { switch (ke->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: case EVFILT_EMPTY: fprintf(fp, "%ju", (uintmax_t)ke->ident); break; case EVFILT_SIGNAL: fputs(strsig2(ke->ident), fp); break; default: fprintf(fp, "%p", (void *)ke->ident); } fprintf(fp, ","); print_integer_arg(sysdecode_kevent_filter, fp, ke->filter); fprintf(fp, ","); print_mask_arg(sysdecode_kevent_flags, fp, ke->flags); fprintf(fp, ","); sysdecode_kevent_fflags(fp, ke->filter, ke->fflags, 16); fprintf(fp, ",%#jx,%p", (uintmax_t)ke->data, ke->udata); } static void print_utrace(FILE *fp, void *utrace_addr, size_t len) { unsigned char *utrace_buffer; fprintf(fp, "{ "); if (sysdecode_utrace(fp, utrace_addr, len)) { fprintf(fp, " }"); return; } utrace_buffer = utrace_addr; fprintf(fp, "%zu:", len); while (len--) fprintf(fp, " %02x", *utrace_buffer++); fprintf(fp, " }"); } static void print_pointer(FILE *fp, uintptr_t arg) { fprintf(fp, "%p", (void *)arg); } static void print_sockaddr(FILE *fp, struct trussinfo *trussinfo, uintptr_t arg, socklen_t len) { char addr[64]; struct sockaddr_in *lsin; struct sockaddr_in6 *lsin6; struct sockaddr_un *sun; struct sockaddr *sa; u_char *q; pid_t pid = trussinfo->curthread->proc->pid; if (arg == 0) { fputs("NULL", fp); return; } /* If the length is too small, just bail. */ if (len < sizeof(*sa)) { print_pointer(fp, arg); return; } sa = calloc(1, len); if (get_struct(pid, arg, sa, len) == -1) { free(sa); print_pointer(fp, arg); return; } switch (sa->sa_family) { case AF_INET: if (len < sizeof(*lsin)) goto sockaddr_short; lsin = (struct sockaddr_in *)(void *)sa; inet_ntop(AF_INET, &lsin->sin_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET %s:%d }", addr, htons(lsin->sin_port)); break; case AF_INET6: if (len < sizeof(*lsin6)) goto sockaddr_short; lsin6 = (struct sockaddr_in6 *)(void *)sa; inet_ntop(AF_INET6, &lsin6->sin6_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET6 [%s]:%d }", addr, htons(lsin6->sin6_port)); break; case AF_UNIX: sun = (struct sockaddr_un *)sa; fprintf(fp, "{ AF_UNIX \"%.*s\" }", (int)(len - offsetof(struct sockaddr_un, sun_path)), sun->sun_path); break; default: sockaddr_short: fprintf(fp, "{ sa_len = %d, sa_family = %d, sa_data = {", (int)sa->sa_len, (int)sa->sa_family); for (q = (u_char *)sa->sa_data; q < (u_char *)sa + len; q++) fprintf(fp, "%s 0x%02x", q == (u_char *)sa->sa_data ? "" : ",", *q); fputs(" } }", fp); } free(sa); } #define IOV_LIMIT 16 static void print_iovec(FILE *fp, struct trussinfo *trussinfo, uintptr_t arg, int iovcnt) { struct iovec iov[IOV_LIMIT]; size_t max_string = trussinfo->strsize; char tmp2[max_string + 1], *tmp3; size_t len; pid_t pid = trussinfo->curthread->proc->pid; int i; bool buf_truncated, iov_truncated; if (iovcnt <= 0) { print_pointer(fp, arg); return; } if (iovcnt > IOV_LIMIT) { iovcnt = IOV_LIMIT; iov_truncated = true; } else { iov_truncated = false; } if (get_struct(pid, arg, &iov, iovcnt * sizeof(struct iovec)) == -1) { print_pointer(fp, arg); return; } fputs("[", fp); for (i = 0; i < iovcnt; i++) { len = iov[i].iov_len; if (len > max_string) { len = max_string; buf_truncated = true; } else { buf_truncated = false; } fprintf(fp, "%s{", (i > 0) ? "," : ""); if (len && get_struct(pid, (uintptr_t)iov[i].iov_base, &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= (int)max_string) break; len--; buf_truncated = true; } fprintf(fp, "\"%s\"%s", tmp3, buf_truncated ? "..." : ""); free(tmp3); } else { print_pointer(fp, (uintptr_t)iov[i].iov_base); } fprintf(fp, ",%zu}", iov[i].iov_len); } fprintf(fp, "%s%s", iov_truncated ? ",..." : "", "]"); } static void print_sigval(FILE *fp, union sigval *sv) { fprintf(fp, "{ %d, %p }", sv->sival_int, sv->sival_ptr); } static void print_sigevent(FILE *fp, struct sigevent *se) { fputs("{ sigev_notify=", fp); switch (se->sigev_notify) { case SIGEV_NONE: fputs("SIGEV_NONE", fp); break; case SIGEV_SIGNAL: fprintf(fp, "SIGEV_SIGNAL, sigev_signo=%s, sigev_value=", strsig2(se->sigev_signo)); print_sigval(fp, &se->sigev_value); break; case SIGEV_THREAD: fputs("SIGEV_THREAD, sigev_value=", fp); print_sigval(fp, &se->sigev_value); break; case SIGEV_KEVENT: fprintf(fp, "SIGEV_KEVENT, sigev_notify_kqueue=%d, sigev_notify_kevent_flags=", se->sigev_notify_kqueue); print_mask_arg(sysdecode_kevent_flags, fp, se->sigev_notify_kevent_flags); break; case SIGEV_THREAD_ID: fprintf(fp, "SIGEV_THREAD_ID, sigev_notify_thread_id=%d, sigev_signo=%s, sigev_value=", se->sigev_notify_thread_id, strsig2(se->sigev_signo)); print_sigval(fp, &se->sigev_value); break; default: fprintf(fp, "%d", se->sigev_notify); break; } fputs(" }", fp); } static void print_aiocb(FILE *fp, struct aiocb *cb) { fprintf(fp, "{ %d,%jd,%p,%zu,%s,", cb->aio_fildes, cb->aio_offset, cb->aio_buf, cb->aio_nbytes, xlookup(lio_opcodes, cb->aio_lio_opcode)); print_sigevent(fp, &cb->aio_sigevent); fputs(" }", fp); } static void print_gen_cmsg(FILE *fp, struct cmsghdr *cmsghdr) { u_char *q; fputs("{", fp); for (q = CMSG_DATA(cmsghdr); q < (u_char *)cmsghdr + cmsghdr->cmsg_len; q++) { fprintf(fp, "%s0x%02x", q == CMSG_DATA(cmsghdr) ? "" : ",", *q); } fputs("}", fp); } static void print_sctp_initmsg(FILE *fp, struct sctp_initmsg *init) { fprintf(fp, "{out=%u,", init->sinit_num_ostreams); fprintf(fp, "in=%u,", init->sinit_max_instreams); fprintf(fp, "max_rtx=%u,", init->sinit_max_attempts); fprintf(fp, "max_rto=%u}", init->sinit_max_init_timeo); } static void print_sctp_sndrcvinfo(FILE *fp, bool receive, struct sctp_sndrcvinfo *info) { fprintf(fp, "{sid=%u,", info->sinfo_stream); if (receive) { fprintf(fp, "ssn=%u,", info->sinfo_ssn); } fputs("flgs=", fp); sysdecode_sctp_sinfo_flags(fp, info->sinfo_flags); fprintf(fp, ",ppid=%u,", ntohl(info->sinfo_ppid)); if (!receive) { fprintf(fp, "ctx=%u,", info->sinfo_context); fprintf(fp, "ttl=%u,", info->sinfo_timetolive); } if (receive) { fprintf(fp, "tsn=%u,", info->sinfo_tsn); fprintf(fp, "cumtsn=%u,", info->sinfo_cumtsn); } fprintf(fp, "id=%u}", info->sinfo_assoc_id); } static void print_sctp_sndinfo(FILE *fp, struct sctp_sndinfo *info) { fprintf(fp, "{sid=%u,", info->snd_sid); fputs("flgs=", fp); print_mask_arg(sysdecode_sctp_snd_flags, fp, info->snd_flags); fprintf(fp, ",ppid=%u,", ntohl(info->snd_ppid)); fprintf(fp, "ctx=%u,", info->snd_context); fprintf(fp, "id=%u}", info->snd_assoc_id); } static void print_sctp_rcvinfo(FILE *fp, struct sctp_rcvinfo *info) { fprintf(fp, "{sid=%u,", info->rcv_sid); fprintf(fp, "ssn=%u,", info->rcv_ssn); fputs("flgs=", fp); print_mask_arg(sysdecode_sctp_rcv_flags, fp, info->rcv_flags); fprintf(fp, ",ppid=%u,", ntohl(info->rcv_ppid)); fprintf(fp, "tsn=%u,", info->rcv_tsn); fprintf(fp, "cumtsn=%u,", info->rcv_cumtsn); fprintf(fp, "ctx=%u,", info->rcv_context); fprintf(fp, "id=%u}", info->rcv_assoc_id); } static void print_sctp_nxtinfo(FILE *fp, struct sctp_nxtinfo *info) { fprintf(fp, "{sid=%u,", info->nxt_sid); fputs("flgs=", fp); print_mask_arg(sysdecode_sctp_nxt_flags, fp, info->nxt_flags); fprintf(fp, ",ppid=%u,", ntohl(info->nxt_ppid)); fprintf(fp, "len=%u,", info->nxt_length); fprintf(fp, "id=%u}", info->nxt_assoc_id); } static void print_sctp_prinfo(FILE *fp, struct sctp_prinfo *info) { fputs("{pol=", fp); print_integer_arg(sysdecode_sctp_pr_policy, fp, info->pr_policy); fprintf(fp, ",val=%u}", info->pr_value); } static void print_sctp_authinfo(FILE *fp, struct sctp_authinfo *info) { fprintf(fp, "{num=%u}", info->auth_keynumber); } static void print_sctp_ipv4_addr(FILE *fp, struct in_addr *addr) { char buf[INET_ADDRSTRLEN]; const char *s; s = inet_ntop(AF_INET, addr, buf, INET_ADDRSTRLEN); if (s != NULL) fprintf(fp, "{addr=%s}", s); else fputs("{addr=???}", fp); } static void print_sctp_ipv6_addr(FILE *fp, struct in6_addr *addr) { char buf[INET6_ADDRSTRLEN]; const char *s; s = inet_ntop(AF_INET6, addr, buf, INET6_ADDRSTRLEN); if (s != NULL) fprintf(fp, "{addr=%s}", s); else fputs("{addr=???}", fp); } static void print_sctp_cmsg(FILE *fp, bool receive, struct cmsghdr *cmsghdr) { void *data; socklen_t len; len = cmsghdr->cmsg_len; data = CMSG_DATA(cmsghdr); switch (cmsghdr->cmsg_type) { case SCTP_INIT: if (len == CMSG_LEN(sizeof(struct sctp_initmsg))) print_sctp_initmsg(fp, (struct sctp_initmsg *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_SNDRCV: if (len == CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) print_sctp_sndrcvinfo(fp, receive, (struct sctp_sndrcvinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; #if 0 case SCTP_EXTRCV: if (len == CMSG_LEN(sizeof(struct sctp_extrcvinfo))) print_sctp_extrcvinfo(fp, (struct sctp_extrcvinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; #endif case SCTP_SNDINFO: if (len == CMSG_LEN(sizeof(struct sctp_sndinfo))) print_sctp_sndinfo(fp, (struct sctp_sndinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_RCVINFO: if (len == CMSG_LEN(sizeof(struct sctp_rcvinfo))) print_sctp_rcvinfo(fp, (struct sctp_rcvinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_NXTINFO: if (len == CMSG_LEN(sizeof(struct sctp_nxtinfo))) print_sctp_nxtinfo(fp, (struct sctp_nxtinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_PRINFO: if (len == CMSG_LEN(sizeof(struct sctp_prinfo))) print_sctp_prinfo(fp, (struct sctp_prinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_AUTHINFO: if (len == CMSG_LEN(sizeof(struct sctp_authinfo))) print_sctp_authinfo(fp, (struct sctp_authinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_DSTADDRV4: if (len == CMSG_LEN(sizeof(struct in_addr))) print_sctp_ipv4_addr(fp, (struct in_addr *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_DSTADDRV6: if (len == CMSG_LEN(sizeof(struct in6_addr))) print_sctp_ipv6_addr(fp, (struct in6_addr *)data); else print_gen_cmsg(fp, cmsghdr); break; default: print_gen_cmsg(fp, cmsghdr); } } static void print_cmsgs(FILE *fp, pid_t pid, bool receive, struct msghdr *msghdr) { struct cmsghdr *cmsghdr; char *cmsgbuf; const char *temp; socklen_t len; int level, type; bool first; len = msghdr->msg_controllen; if (len == 0) { fputs("{}", fp); return; } cmsgbuf = calloc(1, len); if (get_struct(pid, (uintptr_t)msghdr->msg_control, cmsgbuf, len) == -1) { print_pointer(fp, (uintptr_t)msghdr->msg_control); free(cmsgbuf); return; } msghdr->msg_control = cmsgbuf; first = true; fputs("{", fp); for (cmsghdr = CMSG_FIRSTHDR(msghdr); cmsghdr != NULL; cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr)) { level = cmsghdr->cmsg_level; type = cmsghdr->cmsg_type; len = cmsghdr->cmsg_len; fprintf(fp, "%s{level=", first ? "" : ","); print_integer_arg(sysdecode_sockopt_level, fp, level); fputs(",type=", fp); temp = sysdecode_cmsg_type(level, type); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", type); } fputs(",data=", fp); switch (level) { case IPPROTO_SCTP: print_sctp_cmsg(fp, receive, cmsghdr); break; default: print_gen_cmsg(fp, cmsghdr); break; } fputs("}", fp); first = false; } fputs("}", fp); free(cmsgbuf); } static void print_sysctl_oid(FILE *fp, int *oid, size_t len) { size_t i; bool first; first = true; fprintf(fp, "{ "); for (i = 0; i < len; i++) { fprintf(fp, "%s%d", first ? "" : ".", oid[i]); first = false; } fprintf(fp, " }"); } static void print_sysctl(FILE *fp, int *oid, size_t len) { char name[BUFSIZ]; int qoid[CTL_MAXNAME + 2]; size_t i; qoid[0] = CTL_SYSCTL; qoid[1] = CTL_SYSCTL_NAME; memcpy(qoid + 2, oid, len * sizeof(int)); i = sizeof(name); if (sysctl(qoid, len + 2, name, &i, 0, 0) == -1) print_sysctl_oid(fp, oid, len); else fprintf(fp, "%s", name); } /* * Convert a 32-bit user-space pointer to psaddr_t. Currently, this * sign-extends on MIPS and zero-extends on all other architectures. */ static psaddr_t user_ptr32_to_psaddr(int32_t user_pointer) { #if defined(__mips__) return ((psaddr_t)(intptr_t)user_pointer); #else return ((psaddr_t)(uintptr_t)user_pointer); #endif } /* * Converts a syscall argument into a string. Said string is * allocated via malloc(), so needs to be free()'d. sc is * a pointer to the syscall description (see above); args is * an array of all of the system call arguments. */ char * -print_arg(struct syscall_arg *sc, unsigned long *args, register_t *retval, +print_arg(struct syscall_arg *sc, syscallarg_t *args, syscallarg_t *retval, struct trussinfo *trussinfo) { FILE *fp; char *tmp; size_t tmplen; pid_t pid; fp = open_memstream(&tmp, &tmplen); pid = trussinfo->curthread->proc->pid; switch (sc->type & ARG_MASK) { case Hex: fprintf(fp, "0x%x", (int)args[sc->offset]); break; case Octal: fprintf(fp, "0%o", (int)args[sc->offset]); break; case Int: fprintf(fp, "%d", (int)args[sc->offset]); break; case UInt: fprintf(fp, "%u", (unsigned int)args[sc->offset]); break; case PUInt: { unsigned int val; if (get_struct(pid, args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ %u }", val); else print_pointer(fp, args[sc->offset]); break; } case LongHex: - fprintf(fp, "0x%lx", args[sc->offset]); + fprintf(fp, "0x%lx", (long)args[sc->offset]); break; case Long: - fprintf(fp, "%ld", args[sc->offset]); + fprintf(fp, "%ld", (long)args[sc->offset]); break; case Sizet: fprintf(fp, "%zu", (size_t)args[sc->offset]); break; case ShmName: /* Handle special SHM_ANON value. */ if ((char *)(uintptr_t)args[sc->offset] == SHM_ANON) { fprintf(fp, "SHM_ANON"); break; } /* FALLTHROUGH */ case Name: { /* NULL-terminated string. */ char *tmp2; tmp2 = get_string(pid, args[sc->offset], 0); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case BinString: { /* * Binary block of data that might have printable characters. * XXX If type|OUT, assume that the length is the syscall's * return value. Otherwise, assume that the length of the block * is in the next syscall argument. */ int max_string = trussinfo->strsize; char tmp2[max_string + 1], *tmp3; int len; int truncated = 0; if (sc->type & OUT) len = retval[0]; else len = args[sc->offset + 1]; /* * Don't print more than max_string characters, to avoid word * wrap. If we have to truncate put some ... after the string. */ if (len > max_string) { len = max_string; truncated = 1; } if (len && get_struct(pid, args[sc->offset], &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= max_string) break; len--; truncated = 1; } fprintf(fp, "\"%s\"%s", tmp3, truncated ? "..." : ""); free(tmp3); } else { print_pointer(fp, args[sc->offset]); } break; } case ExecArgs: case ExecEnv: case StringArray: { psaddr_t addr; union { int32_t strarray32[PAGE_SIZE / sizeof(int32_t)]; int64_t strarray64[PAGE_SIZE / sizeof(int64_t)]; char buf[PAGE_SIZE]; } u; char *string; size_t len; u_int first, i; size_t pointer_size = trussinfo->curthread->proc->abi->pointer_size; /* * Only parse argv[] and environment arrays from exec calls * if requested. */ if (((sc->type & ARG_MASK) == ExecArgs && (trussinfo->flags & EXECVEARGS) == 0) || ((sc->type & ARG_MASK) == ExecEnv && (trussinfo->flags & EXECVEENVS) == 0)) { print_pointer(fp, args[sc->offset]); break; } /* * Read a page of pointers at a time. Punt if the top-level * pointer is not aligned. Note that the first read is of * a partial page. */ addr = args[sc->offset]; if (!__is_aligned(addr, pointer_size)) { print_pointer(fp, args[sc->offset]); break; } len = PAGE_SIZE - (addr & PAGE_MASK); if (get_struct(pid, addr, u.buf, len) == -1) { print_pointer(fp, args[sc->offset]); break; } assert(len > 0); fputc('[', fp); first = 1; i = 0; for (;;) { psaddr_t straddr; if (pointer_size == 4) { straddr = user_ptr32_to_psaddr(u.strarray32[i]); } else if (pointer_size == 8) { straddr = (psaddr_t)u.strarray64[i]; } else { errx(1, "Unsupported pointer size: %zu", pointer_size); } /* Stop once we read the first NULL pointer. */ if (straddr == 0) break; string = get_string(pid, straddr, 0); fprintf(fp, "%s \"%s\"", first ? "" : ",", string); free(string); first = 0; i++; if (i == len / pointer_size) { addr += len; len = PAGE_SIZE; if (get_struct(pid, addr, u.buf, len) == -1) { fprintf(fp, ", "); break; } i = 0; } } fputs(" ]", fp); break; } case Quad: case QuadHex: { uint64_t value; size_t pointer_size = trussinfo->curthread->proc->abi->pointer_size; if (pointer_size == 4) { #if _BYTE_ORDER == _LITTLE_ENDIAN value = (uint64_t)args[sc->offset + 1] << 32 | args[sc->offset]; #else value = (uint64_t)args[sc->offset] << 32 | args[sc->offset + 1]; #endif } else { value = (uint64_t)args[sc->offset]; } if ((sc->type & ARG_MASK) == Quad) fprintf(fp, "%jd", (intmax_t)value); else fprintf(fp, "0x%jx", (intmax_t)value); break; } case PQuadHex: { uint64_t val; if (get_struct(pid, args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ 0x%jx }", (uintmax_t)val); else print_pointer(fp, args[sc->offset]); break; } case Ptr: print_pointer(fp, args[sc->offset]); break; case Readlinkres: { char *tmp2; if (retval[0] == -1) break; tmp2 = get_string(pid, args[sc->offset], retval[0]); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case Ioctl: { const char *temp; unsigned long cmd; cmd = args[sc->offset]; temp = sysdecode_ioctlname(cmd); if (temp) fputs(temp, fp); else { fprintf(fp, "0x%lx { IO%s%s 0x%lx('%c'), %lu, %lu }", cmd, cmd & IOC_OUT ? "R" : "", cmd & IOC_IN ? "W" : "", IOCGROUP(cmd), isprint(IOCGROUP(cmd)) ? (char)IOCGROUP(cmd) : '?', cmd & 0xFF, IOCPARM_LEN(cmd)); } break; } case Timespec: { struct timespec ts; if (get_struct(pid, args[sc->offset], &ts, sizeof(ts)) != -1) fprintf(fp, "{ %jd.%09ld }", (intmax_t)ts.tv_sec, ts.tv_nsec); else print_pointer(fp, args[sc->offset]); break; } case Timespec2: { struct timespec ts[2]; const char *sep; unsigned int i; if (get_struct(pid, args[sc->offset], &ts, sizeof(ts)) != -1) { fputs("{ ", fp); sep = ""; for (i = 0; i < nitems(ts); i++) { fputs(sep, fp); sep = ", "; switch (ts[i].tv_nsec) { case UTIME_NOW: fprintf(fp, "UTIME_NOW"); break; case UTIME_OMIT: fprintf(fp, "UTIME_OMIT"); break; default: fprintf(fp, "%jd.%09ld", (intmax_t)ts[i].tv_sec, ts[i].tv_nsec); break; } } fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); break; } case Timeval: { struct timeval tv; if (get_struct(pid, args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld }", (intmax_t)tv.tv_sec, tv.tv_usec); else print_pointer(fp, args[sc->offset]); break; } case Timeval2: { struct timeval tv[2]; if (get_struct(pid, args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)tv[0].tv_sec, tv[0].tv_usec, (intmax_t)tv[1].tv_sec, tv[1].tv_usec); else print_pointer(fp, args[sc->offset]); break; } case Itimerval: { struct itimerval itv; if (get_struct(pid, args[sc->offset], &itv, sizeof(itv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)itv.it_interval.tv_sec, itv.it_interval.tv_usec, (intmax_t)itv.it_value.tv_sec, itv.it_value.tv_usec); else print_pointer(fp, args[sc->offset]); break; } case LinuxSockArgs: { struct linux_socketcall_args largs; if (get_struct(pid, args[sc->offset], (void *)&largs, sizeof(largs)) != -1) fprintf(fp, "{ %s, 0x%lx }", lookup(linux_socketcall_ops, largs.what, 10), (long unsigned int)largs.args); else print_pointer(fp, args[sc->offset]); break; } case Pollfd: { /* * XXX: A Pollfd argument expects the /next/ syscall argument * to be the number of fds in the array. This matches the poll * syscall. */ struct pollfd *pfd; int numfds = args[sc->offset + 1]; size_t bytes = sizeof(struct pollfd) * numfds; int i; if ((pfd = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for pollfd array", bytes); if (get_struct(pid, args[sc->offset], pfd, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { fprintf(fp, " %d/%s", pfd[i].fd, xlookup_bits(poll_flags, pfd[i].events)); } fputs(" }", fp); } else { print_pointer(fp, args[sc->offset]); } free(pfd); break; } case Fd_set: { /* * XXX: A Fd_set argument expects the /first/ syscall argument * to be the number of fds in the array. This matches the * select syscall. */ fd_set *fds; int numfds = args[0]; size_t bytes = _howmany(numfds, _NFDBITS) * _NFDBITS; int i; if ((fds = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for fd_set array", bytes); if (get_struct(pid, args[sc->offset], fds, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { if (FD_ISSET(i, fds)) fprintf(fp, " %d", i); } fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); free(fds); break; } case Signal: fputs(strsig2(args[sc->offset]), fp); break; case Sigset: { sigset_t ss; int i, first; if (get_struct(pid, args[sc->offset], (void *)&ss, sizeof(ss)) == -1) { print_pointer(fp, args[sc->offset]); break; } fputs("{ ", fp); first = 1; for (i = 1; i < sys_nsig; i++) { if (sigismember(&ss, i)) { fprintf(fp, "%s%s", !first ? "|" : "", strsig2(i)); first = 0; } } if (!first) fputc(' ', fp); fputc('}', fp); break; } case Sigprocmask: print_integer_arg(sysdecode_sigprocmask_how, fp, args[sc->offset]); break; case Fcntlflag: /* XXX: Output depends on the value of the previous argument. */ if (sysdecode_fcntl_arg_p(args[sc->offset - 1])) sysdecode_fcntl_arg(fp, args[sc->offset - 1], args[sc->offset], 16); break; case Open: print_mask_arg(sysdecode_open_flags, fp, args[sc->offset]); break; case Fcntl: print_integer_arg(sysdecode_fcntl_cmd, fp, args[sc->offset]); break; case Closerangeflags: print_mask_arg(sysdecode_close_range_flags, fp, args[sc->offset]); break; case Mprot: print_mask_arg(sysdecode_mmap_prot, fp, args[sc->offset]); break; case Mmapflags: print_mask_arg(sysdecode_mmap_flags, fp, args[sc->offset]); break; case Whence: print_integer_arg(sysdecode_whence, fp, args[sc->offset]); break; case ShmFlags: print_mask_arg(sysdecode_shmflags, fp, args[sc->offset]); break; case Sockdomain: print_integer_arg(sysdecode_socketdomain, fp, args[sc->offset]); break; case Socktype: print_mask_arg(sysdecode_socket_type, fp, args[sc->offset]); break; case Shutdown: print_integer_arg(sysdecode_shutdown_how, fp, args[sc->offset]); break; case Resource: print_integer_arg(sysdecode_rlimit, fp, args[sc->offset]); break; case RusageWho: print_integer_arg(sysdecode_getrusage_who, fp, args[sc->offset]); break; case Pathconf: print_integer_arg(sysdecode_pathconf_name, fp, args[sc->offset]); break; case Rforkflags: print_mask_arg(sysdecode_rfork_flags, fp, args[sc->offset]); break; case Sockaddr: { socklen_t len; if (args[sc->offset] == 0) { fputs("NULL", fp); break; } /* * Extract the address length from the next argument. If * this is an output sockaddr (OUT is set), then the * next argument is a pointer to a socklen_t. Otherwise * the next argument contains a socklen_t by value. */ if (sc->type & OUT) { if (get_struct(pid, args[sc->offset + 1], &len, sizeof(len)) == -1) { print_pointer(fp, args[sc->offset]); break; } } else len = args[sc->offset + 1]; print_sockaddr(fp, trussinfo, args[sc->offset], len); break; } case Sigaction: { struct sigaction sa; if (get_struct(pid, args[sc->offset], &sa, sizeof(sa)) != -1) { fputs("{ ", fp); if (sa.sa_handler == SIG_DFL) fputs("SIG_DFL", fp); else if (sa.sa_handler == SIG_IGN) fputs("SIG_IGN", fp); else fprintf(fp, "%p", sa.sa_handler); fprintf(fp, " %s ss_t }", xlookup_bits(sigaction_flags, sa.sa_flags)); } else print_pointer(fp, args[sc->offset]); break; } case Sigevent: { struct sigevent se; if (get_struct(pid, args[sc->offset], &se, sizeof(se)) != -1) print_sigevent(fp, &se); else print_pointer(fp, args[sc->offset]); break; } case Kevent: { /* * XXX XXX: The size of the array is determined by either the * next syscall argument, or by the syscall return value, * depending on which argument number we are. This matches the * kevent syscall, but luckily that's the only syscall that uses * them. */ struct kevent *ke; int numevents = -1; size_t bytes; int i; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval[0] != -1) numevents = retval[0]; if (numevents >= 0) { bytes = sizeof(struct kevent) * numevents; if ((ke = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for kevent array", bytes); } else ke = NULL; if (numevents >= 0 && get_struct(pid, args[sc->offset], ke, bytes) != -1) { fputc('{', fp); for (i = 0; i < numevents; i++) { fputc(' ', fp); print_kevent(fp, &ke[i]); } fputs(" }", fp); } else { print_pointer(fp, args[sc->offset]); } free(ke); break; } case Kevent11: { struct freebsd11_kevent *ke11; struct kevent ke; int numevents = -1; size_t bytes; int i; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval[0] != -1) numevents = retval[0]; if (numevents >= 0) { bytes = sizeof(struct freebsd11_kevent) * numevents; if ((ke11 = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for kevent array", bytes); } else ke11 = NULL; memset(&ke, 0, sizeof(ke)); if (numevents >= 0 && get_struct(pid, args[sc->offset], ke11, bytes) != -1) { fputc('{', fp); for (i = 0; i < numevents; i++) { fputc(' ', fp); ke.ident = ke11[i].ident; ke.filter = ke11[i].filter; ke.flags = ke11[i].flags; ke.fflags = ke11[i].fflags; ke.data = ke11[i].data; ke.udata = ke11[i].udata; print_kevent(fp, &ke); } fputs(" }", fp); } else { print_pointer(fp, args[sc->offset]); } free(ke11); break; } case Stat: { struct stat st; if (get_struct(pid, args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { print_pointer(fp, args[sc->offset]); } break; } case Stat11: { struct freebsd11_stat st; if (get_struct(pid, args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { print_pointer(fp, args[sc->offset]); } break; } case StatFs: { unsigned int i; struct statfs buf; if (get_struct(pid, args[sc->offset], &buf, sizeof(buf)) != -1) { char fsid[17]; bzero(fsid, sizeof(fsid)); if (buf.f_fsid.val[0] != 0 || buf.f_fsid.val[1] != 0) { for (i = 0; i < sizeof(buf.f_fsid); i++) snprintf(&fsid[i*2], sizeof(fsid) - (i*2), "%02x", ((u_char *)&buf.f_fsid)[i]); } fprintf(fp, "{ fstypename=%s,mntonname=%s,mntfromname=%s," "fsid=%s }", buf.f_fstypename, buf.f_mntonname, buf.f_mntfromname, fsid); } else print_pointer(fp, args[sc->offset]); break; } case Rusage: { struct rusage ru; if (get_struct(pid, args[sc->offset], &ru, sizeof(ru)) != -1) { fprintf(fp, "{ u=%jd.%06ld,s=%jd.%06ld,in=%ld,out=%ld }", (intmax_t)ru.ru_utime.tv_sec, ru.ru_utime.tv_usec, (intmax_t)ru.ru_stime.tv_sec, ru.ru_stime.tv_usec, ru.ru_inblock, ru.ru_oublock); } else print_pointer(fp, args[sc->offset]); break; } case Rlimit: { struct rlimit rl; if (get_struct(pid, args[sc->offset], &rl, sizeof(rl)) != -1) { fprintf(fp, "{ cur=%ju,max=%ju }", rl.rlim_cur, rl.rlim_max); } else print_pointer(fp, args[sc->offset]); break; } case ExitStatus: { int status; if (get_struct(pid, args[sc->offset], &status, sizeof(status)) != -1) { fputs("{ ", fp); if (WIFCONTINUED(status)) fputs("CONTINUED", fp); else if (WIFEXITED(status)) fprintf(fp, "EXITED,val=%d", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) fprintf(fp, "SIGNALED,sig=%s%s", strsig2(WTERMSIG(status)), WCOREDUMP(status) ? ",cored" : ""); else fprintf(fp, "STOPPED,sig=%s", strsig2(WTERMSIG(status))); fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); break; } case Waitoptions: print_mask_arg(sysdecode_wait6_options, fp, args[sc->offset]); break; case Idtype: print_integer_arg(sysdecode_idtype, fp, args[sc->offset]); break; case Procctl: print_integer_arg(sysdecode_procctl_cmd, fp, args[sc->offset]); break; case Umtxop: { int rem; if (print_mask_arg_part(sysdecode_umtx_op_flags, fp, args[sc->offset], &rem)) fprintf(fp, "|"); print_integer_arg(sysdecode_umtx_op, fp, rem); break; } case Atfd: print_integer_arg(sysdecode_atfd, fp, args[sc->offset]); break; case Atflags: print_mask_arg(sysdecode_atflags, fp, args[sc->offset]); break; case Accessmode: print_mask_arg(sysdecode_access_mode, fp, args[sc->offset]); break; case Sysarch: print_integer_arg(sysdecode_sysarch_number, fp, args[sc->offset]); break; case Sysctl: { char name[BUFSIZ]; int oid[CTL_MAXNAME + 2]; size_t len; memset(name, 0, sizeof(name)); len = args[sc->offset + 1]; if (get_struct(pid, args[sc->offset], oid, len * sizeof(oid[0])) != -1) { fprintf(fp, "\""); if (oid[0] == CTL_SYSCTL) { fprintf(fp, "sysctl."); switch (oid[1]) { case CTL_SYSCTL_DEBUG: fprintf(fp, "debug"); break; case CTL_SYSCTL_NAME: fprintf(fp, "name "); print_sysctl_oid(fp, oid + 2, len - 2); break; case CTL_SYSCTL_NEXT: fprintf(fp, "next"); break; case CTL_SYSCTL_NAME2OID: fprintf(fp, "name2oid %s", get_string(pid, args[sc->offset + 4], args[sc->offset + 5])); break; case CTL_SYSCTL_OIDFMT: fprintf(fp, "oidfmt "); print_sysctl(fp, oid + 2, len - 2); break; case CTL_SYSCTL_OIDDESCR: fprintf(fp, "oiddescr "); print_sysctl(fp, oid + 2, len - 2); break; case CTL_SYSCTL_OIDLABEL: fprintf(fp, "oidlabel "); print_sysctl(fp, oid + 2, len - 2); break; case CTL_SYSCTL_NEXTNOSKIP: fprintf(fp, "nextnoskip"); break; default: print_sysctl(fp, oid + 1, len - 1); } } else { print_sysctl(fp, oid, len); } fprintf(fp, "\""); } break; } case PipeFds: /* * The pipe() system call in the kernel returns its * two file descriptors via return values. However, * the interface exposed by libc is that pipe() * accepts a pointer to an array of descriptors. * Format the output to match the libc API by printing * the returned file descriptors as a fake argument. * * Overwrite the first retval to signal a successful * return as well. */ fprintf(fp, "{ %d, %d }", (int)retval[0], (int)retval[1]); retval[0] = 0; break; case Utrace: { size_t len; void *utrace_addr; len = args[sc->offset + 1]; utrace_addr = calloc(1, len); if (get_struct(pid, args[sc->offset], (void *)utrace_addr, len) != -1) print_utrace(fp, utrace_addr, len); else print_pointer(fp, args[sc->offset]); free(utrace_addr); break; } case IntArray: { int descriptors[16]; unsigned long i, ndescriptors; bool truncated; ndescriptors = args[sc->offset + 1]; truncated = false; if (ndescriptors > nitems(descriptors)) { ndescriptors = nitems(descriptors); truncated = true; } if (get_struct(pid, args[sc->offset], descriptors, ndescriptors * sizeof(descriptors[0])) != -1) { fprintf(fp, "{"); for (i = 0; i < ndescriptors; i++) fprintf(fp, i == 0 ? " %d" : ", %d", descriptors[i]); fprintf(fp, truncated ? ", ... }" : " }"); } else print_pointer(fp, args[sc->offset]); break; } case Pipe2: print_mask_arg(sysdecode_pipe2_flags, fp, args[sc->offset]); break; case CapFcntlRights: { uint32_t rights; if (sc->type & OUT) { if (get_struct(pid, args[sc->offset], &rights, sizeof(rights)) == -1) { print_pointer(fp, args[sc->offset]); break; } } else rights = args[sc->offset]; print_mask_arg32(sysdecode_cap_fcntlrights, fp, rights); break; } case Fadvice: print_integer_arg(sysdecode_fadvice, fp, args[sc->offset]); break; case FileFlags: { fflags_t rem; if (!sysdecode_fileflags(fp, args[sc->offset], &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); break; } case Flockop: print_mask_arg(sysdecode_flock_operation, fp, args[sc->offset]); break; case Getfsstatmode: print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; case Kldsymcmd: print_integer_arg(sysdecode_kldsym_cmd, fp, args[sc->offset]); break; case Kldunloadflags: print_integer_arg(sysdecode_kldunload_flags, fp, args[sc->offset]); break; case AiofsyncOp: fputs(xlookup(aio_fsync_ops, args[sc->offset]), fp); break; case LioMode: fputs(xlookup(lio_modes, args[sc->offset]), fp); break; case Madvice: print_integer_arg(sysdecode_madvice, fp, args[sc->offset]); break; case Socklent: fprintf(fp, "%u", (socklen_t)args[sc->offset]); break; case Sockprotocol: { const char *temp; int domain, protocol; domain = args[sc->offset - 2]; protocol = args[sc->offset]; if (protocol == 0) { fputs("0", fp); } else { temp = sysdecode_socket_protocol(domain, protocol); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", protocol); } } break; } case Sockoptlevel: print_integer_arg(sysdecode_sockopt_level, fp, args[sc->offset]); break; case Sockoptname: { const char *temp; int level, name; level = args[sc->offset - 1]; name = args[sc->offset]; temp = sysdecode_sockopt_name(level, name); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", name); } break; } case Msgflags: print_mask_arg(sysdecode_msg_flags, fp, args[sc->offset]); break; case CapRights: { cap_rights_t rights; if (get_struct(pid, args[sc->offset], &rights, sizeof(rights)) != -1) { fputs("{ ", fp); sysdecode_cap_rights(fp, &rights); fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); break; } case Acltype: print_integer_arg(sysdecode_acltype, fp, args[sc->offset]); break; case Extattrnamespace: print_integer_arg(sysdecode_extattrnamespace, fp, args[sc->offset]); break; case Minherit: print_integer_arg(sysdecode_minherit_inherit, fp, args[sc->offset]); break; case Mlockall: print_mask_arg(sysdecode_mlockall_flags, fp, args[sc->offset]); break; case Mountflags: print_mask_arg(sysdecode_mount_flags, fp, args[sc->offset]); break; case Msync: print_mask_arg(sysdecode_msync_flags, fp, args[sc->offset]); break; case Priowhich: print_integer_arg(sysdecode_prio_which, fp, args[sc->offset]); break; case Ptraceop: print_integer_arg(sysdecode_ptrace_request, fp, args[sc->offset]); break; case Sendfileflags: print_mask_arg(sysdecode_sendfile_flags, fp, args[sc->offset]); break; case Sendfilehdtr: { struct sf_hdtr hdtr; if (get_struct(pid, args[sc->offset], &hdtr, sizeof(hdtr)) != -1) { fprintf(fp, "{"); print_iovec(fp, trussinfo, (uintptr_t)hdtr.headers, hdtr.hdr_cnt); print_iovec(fp, trussinfo, (uintptr_t)hdtr.trailers, hdtr.trl_cnt); fprintf(fp, "}"); } else print_pointer(fp, args[sc->offset]); break; } case Quotactlcmd: if (!sysdecode_quotactl_cmd(fp, args[sc->offset])) fprintf(fp, "%#x", (int)args[sc->offset]); break; case Reboothowto: print_mask_arg(sysdecode_reboot_howto, fp, args[sc->offset]); break; case Rtpriofunc: print_integer_arg(sysdecode_rtprio_function, fp, args[sc->offset]); break; case Schedpolicy: print_integer_arg(sysdecode_scheduler_policy, fp, args[sc->offset]); break; case Schedparam: { struct sched_param sp; if (get_struct(pid, args[sc->offset], &sp, sizeof(sp)) != -1) fprintf(fp, "{ %d }", sp.sched_priority); else print_pointer(fp, args[sc->offset]); break; } case PSig: { int sig; if (get_struct(pid, args[sc->offset], &sig, sizeof(sig)) == 0) fprintf(fp, "{ %s }", strsig2(sig)); else print_pointer(fp, args[sc->offset]); break; } case Siginfo: { siginfo_t si; if (get_struct(pid, args[sc->offset], &si, sizeof(si)) != -1) { fprintf(fp, "{ signo=%s", strsig2(si.si_signo)); decode_siginfo(fp, &si); fprintf(fp, " }"); } else print_pointer(fp, args[sc->offset]); break; } case Iovec: /* * Print argument as an array of struct iovec, where the next * syscall argument is the number of elements of the array. */ print_iovec(fp, trussinfo, args[sc->offset], (int)args[sc->offset + 1]); break; case Aiocb: { struct aiocb cb; if (get_struct(pid, args[sc->offset], &cb, sizeof(cb)) != -1) print_aiocb(fp, &cb); else print_pointer(fp, args[sc->offset]); break; } case AiocbArray: { /* * Print argment as an array of pointers to struct aiocb, where * the next syscall argument is the number of elements. */ uintptr_t cbs[16]; unsigned int nent; bool truncated; nent = args[sc->offset + 1]; truncated = false; if (nent > nitems(cbs)) { nent = nitems(cbs); truncated = true; } if (get_struct(pid, args[sc->offset], cbs, sizeof(uintptr_t) * nent) != -1) { unsigned int i; fputs("[", fp); for (i = 0; i < nent; ++i) { struct aiocb cb; if (i > 0) fputc(',', fp); if (get_struct(pid, cbs[i], &cb, sizeof(cb)) != -1) print_aiocb(fp, &cb); else print_pointer(fp, cbs[i]); } if (truncated) fputs(",...", fp); fputs("]", fp); } else print_pointer(fp, args[sc->offset]); break; } case AiocbPointer: { /* * aio_waitcomplete(2) assigns a pointer to a pointer to struct * aiocb, so we need to handle the extra layer of indirection. */ uintptr_t cbp; struct aiocb cb; if (get_struct(pid, args[sc->offset], &cbp, sizeof(cbp)) != -1) { if (get_struct(pid, cbp, &cb, sizeof(cb)) != -1) print_aiocb(fp, &cb); else print_pointer(fp, cbp); } else print_pointer(fp, args[sc->offset]); break; } case Sctpsndrcvinfo: { struct sctp_sndrcvinfo info; if (get_struct(pid, args[sc->offset], &info, sizeof(struct sctp_sndrcvinfo)) == -1) { print_pointer(fp, args[sc->offset]); break; } print_sctp_sndrcvinfo(fp, sc->type & OUT, &info); break; } case Msghdr: { struct msghdr msghdr; if (get_struct(pid, args[sc->offset], &msghdr, sizeof(struct msghdr)) == -1) { print_pointer(fp, args[sc->offset]); break; } fputs("{", fp); print_sockaddr(fp, trussinfo, (uintptr_t)msghdr.msg_name, msghdr.msg_namelen); fprintf(fp, ",%d,", msghdr.msg_namelen); print_iovec(fp, trussinfo, (uintptr_t)msghdr.msg_iov, msghdr.msg_iovlen); fprintf(fp, ",%d,", msghdr.msg_iovlen); print_cmsgs(fp, pid, sc->type & OUT, &msghdr); fprintf(fp, ",%u,", msghdr.msg_controllen); print_mask_arg(sysdecode_msg_flags, fp, msghdr.msg_flags); fputs("}", fp); break; } default: errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK); } fclose(fp); return (tmp); } /* * Print (to outfile) the system call and its arguments. */ void print_syscall(struct trussinfo *trussinfo) { struct threadinfo *t; const char *name; char **s_args; int i, len, nargs; t = trussinfo->curthread; name = t->cs.sc->name; nargs = t->cs.nargs; s_args = t->cs.s_args; len = print_line_prefix(trussinfo); len += fprintf(trussinfo->outfile, "%s(", name); for (i = 0; i < nargs; i++) { if (s_args[i] != NULL) len += fprintf(trussinfo->outfile, "%s", s_args[i]); else len += fprintf(trussinfo->outfile, ""); len += fprintf(trussinfo->outfile, "%s", i < (nargs - 1) ? "," : ""); } len += fprintf(trussinfo->outfile, ")"); for (i = 0; i < 6 - (len / 8); i++) fprintf(trussinfo->outfile, "\t"); } void -print_syscall_ret(struct trussinfo *trussinfo, int error, register_t *retval) +print_syscall_ret(struct trussinfo *trussinfo, int error, syscallarg_t *retval) { struct timespec timediff; struct threadinfo *t; struct syscall *sc; t = trussinfo->curthread; sc = t->cs.sc; if (trussinfo->flags & COUNTONLY) { timespecsub(&t->after, &t->before, &timediff); timespecadd(&sc->time, &timediff, &sc->time); sc->ncalls++; if (error != 0) sc->nerror++; return; } print_syscall(trussinfo); fflush(trussinfo->outfile); if (retval == NULL) { /* * This system call resulted in the current thread's exit, * so there is no return value or error to display. */ fprintf(trussinfo->outfile, "\n"); return; } if (error == ERESTART) fprintf(trussinfo->outfile, " ERESTART\n"); else if (error == EJUSTRETURN) fprintf(trussinfo->outfile, " EJUSTRETURN\n"); else if (error != 0) { fprintf(trussinfo->outfile, " ERR#%d '%s'\n", sysdecode_freebsd_to_abi_errno(t->proc->abi->abi, error), strerror(error)); } else if (sc->decode.ret_type == 2 && t->proc->abi->pointer_size == 4) { off_t off; #if _BYTE_ORDER == _LITTLE_ENDIAN off = (off_t)retval[1] << 32 | retval[0]; #else off = (off_t)retval[0] << 32 | retval[1]; #endif fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)off, (intmax_t)off); } else { fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)retval[0], (intmax_t)retval[0]); } } void print_summary(struct trussinfo *trussinfo) { struct timespec total = {0, 0}; struct syscall *sc; int ncall, nerror; fprintf(trussinfo->outfile, "%-20s%15s%8s%8s\n", "syscall", "seconds", "calls", "errors"); ncall = nerror = 0; STAILQ_FOREACH(sc, &seen_syscalls, entries) { if (sc->ncalls) { fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", sc->name, (intmax_t)sc->time.tv_sec, sc->time.tv_nsec, sc->ncalls, sc->nerror); timespecadd(&total, &sc->time, &total); ncall += sc->ncalls; nerror += sc->nerror; } } fprintf(trussinfo->outfile, "%20s%15s%8s%8s\n", "", "-------------", "-------", "-------"); fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", "", (intmax_t)total.tv_sec, total.tv_nsec, ncall, nerror); } diff --git a/usr.bin/truss/truss.h b/usr.bin/truss/truss.h index a3ce8f27d953..4d2680cd4913 100644 --- a/usr.bin/truss/truss.h +++ b/usr.bin/truss/truss.h @@ -1,118 +1,118 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2001 Jamey Wood * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #define FOLLOWFORKS 0x00000001 #define RELATIVETIMESTAMPS 0x00000002 #define ABSOLUTETIMESTAMPS 0x00000004 #define NOSIGS 0x00000008 #define EXECVEARGS 0x00000010 #define EXECVEENVS 0x00000020 #define COUNTONLY 0x00000040 #define DISPLAYTIDS 0x00000080 struct procinfo; struct syscall; struct trussinfo; /* * The lookup of normal system calls are optimized by using a fixed * array for the first 1024 system calls that can be indexed directly. * Unknown system calls with other IDs are stored in a linked list. */ #define SYSCALL_NORMAL_COUNT 1024 struct extra_syscall { STAILQ_ENTRY(extra_syscall) entries; struct syscall *sc; u_int number; }; struct procabi { const char *type; enum sysdecode_abi abi; size_t pointer_size; const char *compat_prefix; STAILQ_HEAD(, extra_syscall) extra_syscalls; struct syscall *syscalls[SYSCALL_NORMAL_COUNT]; }; /* * This is confusingly named. It holds per-thread state about the * currently executing system call. syscall.h defines a struct * syscall that holds metadata used to format system call arguments. * * NB: args[] stores the raw argument values (e.g. from registers) * passed to the system call. s_args[] stores a string representation * of a system call's arguments. These do not necessarily map one to * one. A system call description may omit individual arguments * (padding) or combine adjacent arguments (e.g. when passing an off_t * argument on a 32-bit system). The nargs member contains the count * of valid pointers in s_args[], not args[]. */ struct current_syscall { struct syscall *sc; unsigned int number; unsigned int nargs; - unsigned long args[10]; + syscallarg_t args[10]; char *s_args[10]; /* the printable arguments */ }; struct threadinfo { LIST_ENTRY(threadinfo) entries; struct procinfo *proc; lwpid_t tid; int in_syscall; struct current_syscall cs; struct timespec before; struct timespec after; }; struct procinfo { LIST_ENTRY(procinfo) entries; pid_t pid; struct procabi *abi; LIST_HEAD(, threadinfo) threadlist; }; struct trussinfo { int flags; int strsize; FILE *outfile; struct timespec start_time; struct threadinfo *curthread; LIST_HEAD(, procinfo) proclist; };