Index: head/sys/amd64/amd64/trap.c =================================================================== --- head/sys/amd64/amd64/trap.c (revision 366204) +++ head/sys/amd64/amd64/trap.c (revision 366205) @@ -1,1196 +1,1196 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include #endif extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); void __noinline trap(struct trapframe *frame); void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, bool, int *, int *); static void trap_fatal(struct trapframe *, vm_offset_t); #ifdef KDTRACE_HOOKS static bool trap_user_dtrace(struct trapframe *, int (**hook)(struct trapframe *)); #endif static const char UNKNOWN[] = "unknown"; static const char *const trap_msg[] = { [0] = UNKNOWN, /* unused */ [T_PRIVINFLT] = "privileged instruction fault", [2] = UNKNOWN, /* unused */ [T_BPTFLT] = "breakpoint instruction fault", [4] = UNKNOWN, /* unused */ [5] = UNKNOWN, /* unused */ [T_ARITHTRAP] = "arithmetic trap", [7] = UNKNOWN, /* unused */ [8] = UNKNOWN, /* unused */ [T_PROTFLT] = "general protection fault", [T_TRCTRAP] = "debug exception", [11] = UNKNOWN, /* unused */ [T_PAGEFLT] = "page fault", [13] = UNKNOWN, /* unused */ [T_ALIGNFLT] = "alignment fault", [15] = UNKNOWN, /* unused */ [16] = UNKNOWN, /* unused */ [17] = UNKNOWN, /* unused */ [T_DIVIDE] = "integer divide fault", [T_NMI] = "non-maskable interrupt trap", [T_OFLOW] = "overflow trap", [T_BOUND] = "FPU bounds check fault", [T_DNA] = "FPU device not available", [T_DOUBLEFLT] = "double fault", [T_FPOPFLT] = "FPU operand fetch fault", [T_TSSFLT] = "invalid TSS fault", [T_SEGNPFLT] = "segment not present fault", [T_STKFLT] = "stack fault", [T_MCHK] = "machine check trap", [T_XMMFLT] = "SIMD floating-point exception", [T_RESERVED] = "reserved (unknown) fault", [31] = UNKNOWN, /* reserved */ [T_DTRACE_RET] = "DTrace pid return trap", }; static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Control L1D flush on return from NMI. * * Tunable can be set to the following values: * 0 - only enable flush on return from NMI if required by vmm.ko (default) * >1 - always flush on return from NMI. * * Post-boot, the sysctl indicates if flushing is currently enabled. */ int nmi_flush_l1d_sw; SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN, &nmi_flush_l1d_sw, 0, "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; register_t addr, dr6; int pf, signo, ucode; u_int type; td = curthread; p = td->td_proc; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } if (TRAPF_USERMODE(frame)) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ #ifdef KDTRACE_HOOKS if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr)) return; #else enable_intr(); #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_rflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ /* * Can emulator handle this trap? */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) return; pf = trap_pfault(frame, true, &signo, &ucode); if (pf == -1) return; if (pf == 0) goto userret; addr = frame->tf_addr; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; case T_NMI: nmi_handle_intr(type, frame); return; case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: (void)trap_user_dtrace(frame, &dtrace_return_probe_ptr); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void)trap_pfault(frame, false, NULL, NULL); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * In case of PTI, the IRETQ faulted while the * kernel used the pti stack, and exception * frame records %rsp value pointing to that * stack. If we return normally to * doreti_iret_fault, the trapframe is * reconstructed on pti stack, and calltrap() * called on it as well. Due to the very * limited pti stack size, kernel does not * survive for too long. Switch to the normal * thread stack for the trap handling. * * Magic '5' is the number of qwords occupied by * the hardware trap frame. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; if ((PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3) && (frame->tf_rsp == (uintptr_t)PCPU_GET( pti_rsp0) - 5 * sizeof(register_t))) { frame->tf_rsp = PCPU_GET(rsp0) - 5 * sizeof(register_t); } return; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; return; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; return; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; return; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; return; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; return; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (pti) { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall_pti) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall_pti) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti)) return; } else { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt)) return; } if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) || /* Needed for AMD. */ frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; case T_NMI: nmi_handle_intr(type, frame); return; } trap_fatal(frame, 0); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static bool trap_is_smap(struct trapframe *frame) { /* * A page fault on a userspace address is classified as * SMAP-induced if: * - SMAP is supported; * - kernel mode accessed present data page; * - rflags.AC was cleared. * Kernel must never access user space with rflags.AC cleared * if SMAP is enabled. */ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 && (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) == PGEX_P && (frame->tf_rflags & PSL_AC) == 0); } static bool trap_is_pti(struct trapframe *frame) { return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 && pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W | PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) && (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK) == (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK)); } /* * Handle all details of a page fault. * Returns: * -1 if this fault was fatal, typically from kernel mode * (cannot happen, but we need to return something). * 0 if this fault was handled by updating either the user or kernel * page table, execution can continue. * 1 if this fault was from usermode and it was not handled, a synchronous * signal should be delivered to the thread. *signo returns the signal * number, *ucode gives si_code. */ static int trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode) { struct thread *td; struct proc *p; vm_map_t map; vm_offset_t eva; int rv; vm_prot_t ftype; MPASS(!usermode || (signo != NULL && ucode != NULL)); td = curthread; p = td->td_proc; eva = frame->tf_addr; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } if (eva >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) { *signo = SIGSEGV; *ucode = SEGV_MAPERR; return (1); } map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. * * If SMAP is enabled, filter SMAP faults also, * because illegal access might occur to the mapped * user address, causing infinite loop. */ if (!usermode && (td->td_intr_nesting_level != 0 || trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * User-mode protection key violation (PKU). May happen * either from usermode or from kernel if copyin accessed * key-protected mapping. */ if ((frame->tf_err & PGEX_PK) != 0) { if (eva > VM_MAXUSER_ADDRESS) { trap_fatal(frame, eva); return (-1); } if (usermode) { *signo = SIGSEGV; *ucode = SEGV_PKUERR; return (1); } goto after_vmfault; } /* * If nx protection of the usermode portion of kernel page * tables caused trap, panic. */ if (usermode && trap_is_pti(frame)) panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid, p->p_comm, frame->tf_err); /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (usermode) return (1); after_vmfault: if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss; u_int type; struct soft_segment_descriptor softseg; struct user_segment_descriptor *gdt; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; gdt = *PCPU_PTR(gdt); sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); printf("\n\nFatal trap %d: %s while in %s mode\n", type, type < nitems(trap_msg) ? trap_msg[type] : UNKNOWN, TRAPF_USERMODE(frame) ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s%s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_PK ? " prot key" : "", code & PGEX_SGX ? " SGX" : "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); ss = frame->tf_ss & 0xffff; printf("stack pointer = 0x%x:0x%lx\n", ss, frame->tf_rsp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(type, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); panic("%s", type < nitems(trap_msg) ? trap_msg[type] : "unknown/reserved trap"); } #ifdef KDTRACE_HOOKS /* * Invoke a userspace DTrace hook. The hook pointer is cleared when no * userspace probes are enabled, so we must synchronize with DTrace to ensure * that a trapping thread is able to call the hook before it is cleared. */ static bool trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *)) { int (*hook)(struct trapframe *); hook = atomic_load_ptr(hookp); enable_intr(); if (hook != NULL) return ((hook)(frame) == 0); return (false); } #endif /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n" "rip %#lx rsp %#lx rbp %#lx\n" "rax %#lx rdx %#lx rbx %#lx\n" "rcx %#lx rsi %#lx rdi %#lx\n" "r8 %#lx r9 %#lx r10 %#lx\n" "r11 %#lx r12 %#lx r13 %#lx\n" "r14 %#lx r15 %#lx rflags %#lx\n" "cs %#lx ss %#lx ds %#hx es %#hx fs %#hx gs %#hx\n" "fsbase %#lx gsbase %#lx kgsbase %#lx\n", frame->tf_rip, frame->tf_rsp, frame->tf_rbp, frame->tf_rax, frame->tf_rdx, frame->tf_rbx, frame->tf_rcx, frame->tf_rdi, frame->tf_rsi, frame->tf_r8, frame->tf_r9, frame->tf_r10, frame->tf_r11, frame->tf_r12, frame->tf_r13, frame->tf_r14, frame->tf_r15, frame->tf_rflags, frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, frame->tf_fs, frame->tf_gs, rdmsr(MSR_FSBASE), rdmsr(MSR_GSBASE), rdmsr(MSR_KGSBASE)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } static int __noinline cpu_fetch_syscall_args_fallback(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; register_t *argp; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; reg = 0; regcnt = NARGREGS; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; - KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!")); + KASSERT(sa->callp->sy_narg <= nitems(sa->args), + ("Too many syscall arguments!")); argp = &frame->tf_rdi; argp += reg; memcpy(sa->args, argp, sizeof(sa->args[0]) * NARGREGS); - if (sa->narg > regcnt) { + if (sa->callp->sy_narg > regcnt) { params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &sa->args[regcnt], - (sa->narg - regcnt) * sizeof(sa->args[0])); + (sa->callp->sy_narg - regcnt) * sizeof(sa->args[0])); if (__predict_false(error != 0)) return (error); } td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_rax; if (__predict_false(sa->code == SYS_syscall || sa->code == SYS___syscall || sa->code >= p->p_sysent->sv_size)) return (cpu_fetch_syscall_args_fallback(td, sa)); sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; - KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!")); + KASSERT(sa->callp->sy_narg <= nitems(sa->args), + ("Too many syscall arguments!")); - if (__predict_false(sa->narg > NARGREGS)) + if (__predict_false(sa->callp->sy_narg > NARGREGS)) return (cpu_fetch_syscall_args_fallback(td, sa)); memcpy(sa->args, &frame->tf_rdi, sizeof(sa->args[0]) * NARGREGS); td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } #include "../../kern/subr_syscall.c" static void (*syscall_ret_l1d_flush)(void); int syscall_ret_l1d_flush_mode; static void flush_l1d_hw(void) { wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D); } static void __noinline amd64_syscall_ret_flush_l1d_check(int error) { void (*p)(void); if (error != EEXIST && error != EAGAIN && error != EXDEV && error != ENOENT && error != ENOTCONN && error != EINPROGRESS) { p = atomic_load_ptr(&syscall_ret_l1d_flush); if (p != NULL) p(); } } static void __inline amd64_syscall_ret_flush_l1d_check_inline(int error) { if (__predict_false(error != 0)) amd64_syscall_ret_flush_l1d_check(error); } void amd64_syscall_ret_flush_l1d(int error) { amd64_syscall_ret_flush_l1d_check_inline(error); } void amd64_syscall_ret_flush_l1d_recalc(void) { bool l1d_hw; l1d_hw = (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0; again: switch (syscall_ret_l1d_flush_mode) { case 0: syscall_ret_l1d_flush = NULL; break; case 1: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : flush_l1d_sw_abi; break; case 2: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : NULL; break; case 3: syscall_ret_l1d_flush = flush_l1d_sw_abi; break; default: syscall_ret_l1d_flush_mode = 1; goto again; } } static int machdep_syscall_ret_flush_l1d(SYSCTL_HANDLER_ARGS) { int error, val; val = syscall_ret_l1d_flush_mode; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); syscall_ret_l1d_flush_mode = val; amd64_syscall_ret_flush_l1d_recalc(); return (0); } SYSCTL_PROC(_machdep, OID_AUTO, syscall_ret_flush_l1d, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, machdep_syscall_ret_flush_l1d, "I", "Flush L1D on syscall return with error (0 - off, 1 - on, " "2 - use hw only, 3 - use sw only"); /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!TRAPF_USERMODE(td->td_frame)) { panic("syscall"); /* NOT REACHED */ } #endif syscallenter(td); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); KASSERT(pmap_not_in_di(), ("System call %s returning with leaked invl_gen %lu", syscallname(td->td_proc, td->td_sa.code), td->td_md.md_invl_gen.gen)); syscallret(td); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (__predict_false(td->td_frame->tf_rip >= (la57 ? VM_MAXUSER_ADDRESS_LA57 : VM_MAXUSER_ADDRESS_LA48))) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); amd64_syscall_ret_flush_l1d_check_inline(td->td_errno); } Index: head/sys/amd64/cloudabi32/cloudabi32_sysvec.c =================================================================== --- head/sys/amd64/cloudabi32/cloudabi32_sysvec.c (revision 366204) +++ head/sys/amd64/cloudabi32/cloudabi32_sysvec.c (revision 366205) @@ -1,234 +1,233 @@ /*- * Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cloudabi32_syscallnames[]; extern struct sysent cloudabi32_sysent[]; extern unsigned long ia32_maxssiz; static int cloudabi32_fixup_tcb(uintptr_t *stack_base, struct image_params *imgp) { int error; uint32_t args[2]; /* Place auxiliary vector and TCB on the stack. */ error = cloudabi32_fixup(stack_base, imgp); if (error != 0) return (error); /* * On i386, the TCB is referred to by %gs:0. Reuse the empty * space normally used by the return address (args[0]) to store * a single element array, containing a pointer to the TCB. %gs * base will point to this. * * Also let the first argument of the entry point (args[1]) * refer to the auxiliary vector, which is stored right after * the TCB. */ args[0] = *stack_base; args[1] = *stack_base + roundup(sizeof(cloudabi32_tcb_t), sizeof(register_t)); *stack_base -= roundup2(sizeof(args), sizeof(register_t)); return (copyout(args, (void *)*stack_base, sizeof(args))); } static void cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { ia32_setregs(td, imgp, stack); (void)cpu_set_user_tls(td, TO_PTR(stack)); } static int cloudabi32_fetch_syscall_args(struct thread *td) { struct trapframe *frame; struct syscall_args *sa; int error; frame = td->td_frame; sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_rax; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) return (ENOSYS); sa->callp = &cloudabi32_sysent[sa->code]; - sa->narg = sa->callp->sy_narg; /* * Fetch system call arguments. * * The vDSO has already made sure that the arguments are * eight-byte aligned. Pointers and size_t parameters are * zero-extended. This makes it possible to copy in the * arguments directly. As long as the call doesn't use 32-bit * data structures, we can just invoke the same system call * implementation used by 64-bit processes. */ error = copyin((void *)frame->tf_rcx, sa->args, - sa->narg * sizeof(sa->args[0])); + sa->callp->sy_narg * sizeof(sa->args[0])); if (error != 0) return (error); /* Default system call return values. */ td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } static void cloudabi32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; switch (error) { case 0: /* * System call succeeded. * * Simply copy out the 64-bit return values into the * same buffer provided for system call arguments. The * vDSO will copy them to the right spot, truncating * pointers and size_t values to 32 bits. */ frame->tf_rax = copyout(td->td_retval, (void *)frame->tf_rcx, sizeof(td->td_retval)) == 0 ? 0 : CLOUDABI_EFAULT; break; case ERESTART: /* Restart system call. */ frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); break; case EJUSTRETURN: break; default: /* System call returned an error. */ frame->tf_rax = cloudabi_convert_errno(error); break; } } static void cloudabi32_schedtail(struct thread *td) { struct trapframe *frame = td->td_frame; register_t retval[2]; /* Return values for processes returning from fork. */ if ((td->td_pflags & TDP_FORKING) != 0) { retval[0] = CLOUDABI_PROCESS_CHILD; retval[1] = td->td_tid; copyout(retval, (void *)frame->tf_rcx, sizeof(retval)); } } int cloudabi32_thread_setregs(struct thread *td, const cloudabi32_threadattr_t *attr, uint32_t tcb) { stack_t stack; uint32_t args[3]; void *frameptr; int error; /* Perform standard register initialization. */ stack.ss_sp = TO_PTR(attr->stack); stack.ss_size = attr->stack_len - sizeof(args); cpu_set_upcall(td, TO_PTR(attr->entry_point), NULL, &stack); /* * Copy the arguments for the thread entry point onto the stack * (args[1] and args[2]). Similar to process startup, use the * otherwise unused return address (args[0]) for TLS. */ args[0] = tcb; args[1] = td->td_tid; args[2] = attr->argument; frameptr = (void *)td->td_frame->tf_rsp; error = copyout(args, frameptr, sizeof(args)); if (error != 0) return (error); return (cpu_set_user_tls(td, frameptr)); } static struct sysentvec cloudabi32_elf_sysvec = { .sv_size = CLOUDABI32_SYS_MAXSYSCALL, .sv_table = cloudabi32_sysent, .sv_fixup = cloudabi32_fixup_tcb, .sv_name = "CloudABI ELF32", .sv_coredump = elf32_coredump, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi32_copyout_strings, .sv_setregs = cloudabi32_proc_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_CLOUDABI | SV_CAPSICUM | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cloudabi32_set_syscall_retval, .sv_fetch_syscall_args = cloudabi32_fetch_syscall_args, .sv_syscallnames = cloudabi32_syscallnames, .sv_schedtail = cloudabi32_schedtail, }; INIT_SYSENTVEC(elf_sysvec, &cloudabi32_elf_sysvec); Elf32_Brandinfo cloudabi32_brand = { .brand = ELFOSABI_CLOUDABI, .machine = EM_386, .sysvec = &cloudabi32_elf_sysvec, .flags = BI_BRAND_ONLY_STATIC, }; Index: head/sys/amd64/cloudabi64/cloudabi64_sysvec.c =================================================================== --- head/sys/amd64/cloudabi64/cloudabi64_sysvec.c (revision 366204) +++ head/sys/amd64/cloudabi64/cloudabi64_sysvec.c (revision 366205) @@ -1,221 +1,220 @@ /*- * Copyright (c) 2015 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cloudabi64_syscallnames[]; extern struct sysent cloudabi64_sysent[]; static int cloudabi64_fixup_tcb(uintptr_t *stack_base, struct image_params *imgp) { int error; register_t tcbptr; /* Place auxiliary vector and TCB on the stack. */ error = cloudabi64_fixup(stack_base, imgp); if (error != 0) return (error); /* * On x86-64, the TCB is referred to by %fs:0. Take some space * from the top of the stack to store a single element array, * containing a pointer to the TCB. %fs base will point to this. */ tcbptr = (register_t)*stack_base; *stack_base -= sizeof(tcbptr); return (copyout(&tcbptr, (void *)*stack_base, sizeof(tcbptr))); } static void cloudabi64_proc_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs; exec_setregs(td, imgp, stack); /* * The stack now contains a pointer to the TCB, the TCB itself, * and the auxiliary vector. Let %rdx point to the auxiliary * vector, and set %fs base to the address of the TCB. */ regs = td->td_frame; regs->tf_rdi = stack + sizeof(register_t) + roundup(sizeof(cloudabi64_tcb_t), sizeof(register_t)); (void)cpu_set_user_tls(td, TO_PTR(stack)); } static int cloudabi64_fetch_syscall_args(struct thread *td) { struct trapframe *frame; struct syscall_args *sa; frame = td->td_frame; sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_rax; if (sa->code >= CLOUDABI64_SYS_MAXSYSCALL) return (ENOSYS); sa->callp = &cloudabi64_sysent[sa->code]; - sa->narg = sa->callp->sy_narg; /* Fetch system call arguments. */ sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rcx; /* Actually %r10. */ sa->args[4] = frame->tf_r8; sa->args[5] = frame->tf_r9; /* Default system call return values. */ td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } static void cloudabi64_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; switch (error) { case 0: /* System call succeeded. */ frame->tf_rax = td->td_retval[0]; frame->tf_rdx = td->td_retval[1]; frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* Restart system call. */ frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); break; case EJUSTRETURN: break; default: /* System call returned an error. */ frame->tf_rax = cloudabi_convert_errno(error); frame->tf_rflags |= PSL_C; break; } } static void cloudabi64_schedtail(struct thread *td) { struct trapframe *frame = td->td_frame; /* Initial register values for processes returning from fork. */ frame->tf_rax = CLOUDABI_PROCESS_CHILD; frame->tf_rdx = td->td_tid; } int cloudabi64_thread_setregs(struct thread *td, const cloudabi64_threadattr_t *attr, uint64_t tcb) { struct trapframe *frame; stack_t stack; uint64_t tcbptr; int error; /* * On x86-64, the TCB is referred to by %fs:0. Take some space * from the top of the stack to store a single element array, * containing a pointer to the TCB. %fs base will point to this. */ tcbptr = rounddown(attr->stack + attr->stack_len - sizeof(tcbptr), _Alignof(tcbptr)); error = copyout(&tcb, (void *)tcbptr, sizeof(tcb)); if (error != 0) return (error); /* Perform standard register initialization. */ stack.ss_sp = TO_PTR(attr->stack); stack.ss_size = tcbptr - attr->stack; cpu_set_upcall(td, TO_PTR(attr->entry_point), NULL, &stack); /* * Pass in the thread ID of the new thread and the argument * pointer provided by the parent thread in as arguments to the * entry point. */ frame = td->td_frame; frame->tf_rdi = td->td_tid; frame->tf_rsi = attr->argument; return (cpu_set_user_tls(td, TO_PTR(tcbptr))); } static struct sysentvec cloudabi64_elf_sysvec = { .sv_size = CLOUDABI64_SYS_MAXSYSCALL, .sv_table = cloudabi64_sysent, .sv_fixup = cloudabi64_fixup_tcb, .sv_name = "CloudABI ELF64", .sv_coredump = elf64_coredump, .sv_minuser = VM_MIN_ADDRESS, /* Keep top page reserved to work around AMD Ryzen stability issues. */ .sv_maxuser = VM_MAXUSER_ADDRESS - PAGE_SIZE, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi64_copyout_strings, .sv_setregs = cloudabi64_proc_setregs, .sv_flags = SV_ABI_CLOUDABI | SV_CAPSICUM | SV_LP64, .sv_set_syscall_retval = cloudabi64_set_syscall_retval, .sv_fetch_syscall_args = cloudabi64_fetch_syscall_args, .sv_syscallnames = cloudabi64_syscallnames, .sv_schedtail = cloudabi64_schedtail, }; INIT_SYSENTVEC(elf_sysvec, &cloudabi64_elf_sysvec); Elf64_Brandinfo cloudabi64_brand = { .brand = ELFOSABI_CLOUDABI, .machine = EM_X86_64, .sysvec = &cloudabi64_elf_sysvec, .flags = BI_CAN_EXEC_DYN | BI_BRAND_ONLY_STATIC, }; Index: head/sys/amd64/ia32/ia32_syscall.c =================================================================== --- head/sys/amd64/ia32/ia32_syscall.c (revision 366204) +++ head/sys/amd64/ia32/ia32_syscall.c (revision 366205) @@ -1,281 +1,280 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(int0x80_syscall_pti), IDTVEC(rsvd), IDTVEC(rsvd_pti); void ia32_syscall(struct trapframe *frame); /* Called from asm code */ void ia32_set_syscall_retval(struct thread *td, int error) { cpu_set_syscall_retval(td, error); } int ia32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; u_int32_t args[8], tmp; int error, i; #ifdef COMPAT_43 u_int32_t eip; int cs; #endif p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; #ifdef COMPAT_43 if (__predict_false(frame->tf_cs == 7 && frame->tf_rip == 2)) { /* * In lcall $7,$0 after int $0x80. Convert the user * frame to what it would be for a direct int 0x80 instead * of lcall $7,$0, by popping the lcall return address. */ error = fueword32((void *)frame->tf_rsp, &eip); if (error == -1) return (EFAULT); cs = fuword16((void *)(frame->tf_rsp + sizeof(u_int32_t))); if (cs == -1) return (EFAULT); /* * Unwind in-kernel frame after all stack frame pieces * were successfully read. */ frame->tf_rip = eip; frame->tf_cs = cs; frame->tf_rsp += 2 * sizeof(u_int32_t); frame->tf_err = 7; /* size of lcall $7,$0 */ } #endif params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t); sa->code = frame->tf_rax; /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword32(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(int); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. * We use a 32-bit fetch in case params is not * aligned. */ error = fueword32(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; - if (params != NULL && sa->narg != 0) + if (params != NULL && sa->callp->sy_narg != 0) error = copyin(params, (caddr_t)args, - (u_int)(sa->narg * sizeof(int))); + (u_int)(sa->callp->sy_narg * sizeof(int))); else error = 0; - for (i = 0; i < sa->narg; i++) + for (i = 0; i < sa->callp->sy_narg; i++) sa->args[i] = args[i]; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; } return (error); } #include "../../kern/subr_syscall.c" void ia32_syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_rflags; ksiginfo_t ksi; orig_tf_rflags = frame->tf_rflags; td = curthread; td->td_frame = frame; syscallenter(td); /* * Traced syscall. */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_rip; trapsignal(td, &ksi); } syscallret(td); amd64_syscall_ret_flush_l1d(td->td_errno); } static void ia32_syscall_enable(void *dummy) { setidt(IDT_SYSCALL, pti ? &IDTVEC(int0x80_syscall_pti) : &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0); } static void ia32_syscall_disable(void *dummy) { setidt(IDT_SYSCALL, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); } SYSINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_enable, NULL); SYSUNINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_disable, NULL); #ifdef COMPAT_43 int setup_lcall_gate(void) { struct i386_ldt_args uap; struct user_segment_descriptor desc; uint32_t lcall_addr; int error; bzero(&uap, sizeof(uap)); uap.start = 0; uap.num = 1; lcall_addr = curproc->p_sysent->sv_psstrings - sz_lcall_tramp; bzero(&desc, sizeof(desc)); desc.sd_type = SDT_MEMERA; desc.sd_dpl = SEL_UPL; desc.sd_p = 1; desc.sd_def32 = 1; desc.sd_gran = 1; desc.sd_lolimit = 0xffff; desc.sd_hilimit = 0xf; desc.sd_lobase = lcall_addr; desc.sd_hibase = lcall_addr >> 24; error = amd64_set_ldt(curthread, &uap, &desc); if (error != 0) return (error); return (0); } #endif Index: head/sys/amd64/include/proc.h =================================================================== --- head/sys/amd64/include/proc.h (revision 366204) +++ head/sys/amd64/include/proc.h (revision 366205) @@ -1,125 +1,124 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)proc.h 7.1 (Berkeley) 5/15/91 * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ #include #include #include /* * List of locks * c - proc lock * k - only accessed by curthread * pp - pmap.c:invl_gen_mtx */ struct proc_ldt { caddr_t ldt_base; int ldt_refcnt; }; #define PMAP_INVL_GEN_NEXT_INVALID 0x1ULL struct pmap_invl_gen { u_long gen; /* (k) */ union { LIST_ENTRY(pmap_invl_gen) link; /* (pp) */ struct { struct pmap_invl_gen *next; u_char saved_pri; }; }; } __aligned(16); /* * Machine-dependent part of the proc structure for AMD64. */ struct mdthread { int md_spinlock_count; /* (k) */ register_t md_saved_flags; /* (k) */ register_t md_spurflt_addr; /* (k) Spurious page fault address. */ struct pmap_invl_gen md_invl_gen; register_t md_efirt_tmp; /* (k) */ int md_efirt_dis_pf; /* (k) */ struct pcb md_pcb; vm_offset_t md_stack_base; }; struct mdproc { struct proc_ldt *md_ldt; /* (t) per-process ldt */ struct system_segment_descriptor md_ldt_sd; u_int md_flags; /* (c) md process flags P_MD */ }; #define P_MD_KPTI 0x00000001 /* Enable KPTI on exec */ #define P_MD_LA48 0x00000002 /* Request LA48 after exec */ #define P_MD_LA57 0x00000004 /* Request LA57 after exec */ #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 768 struct syscall_args { u_int code; struct sysent *callp; register_t args[8]; - int narg; }; #ifdef _KERNEL /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ (total) = td->td_kstack_pages * PAGE_SIZE; \ (used) = (char *)td->td_kstack + \ td->td_kstack_pages * PAGE_SIZE - \ (char *)&td; \ } while (0) struct proc_ldt *user_ldt_alloc(struct proc *, int); void user_ldt_free(struct thread *); struct sysarch_args; int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space); int amd64_set_ldt_data(struct thread *td, int start, int num, struct user_segment_descriptor *descs); extern struct mtx dt_lock; extern int max_ldt_segment; #define NARGREGS 6 #endif /* _KERNEL */ #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/amd64/linux/linux_sysvec.c =================================================================== --- head/sys/amd64/linux/linux_sysvec.c (revision 366204) +++ head/sys/amd64/linux/linux_sysvec.c (revision 366205) @@ -1,938 +1,937 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64, 1); const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *iparams); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static int linux_vsyscall(struct thread *td); #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc; frame = td->td_frame; sa = &td->td_sa; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rcx; sa->args[4] = frame->tf_r8; sa->args[5] = frame->tf_r9; sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; /* * On Linux only %rcx and %r11 values are not preserved across * the syscall. So, do not clobber %rdx and %r10. */ td->td_retval[1] = frame->tf_rdx; if (error != EJUSTRETURN) frame->tf_r10 = frame->tf_rcx; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) frame->tf_rax = linux_to_bsd_errno(error); } /* Restore all registers. */ set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } static int linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) { Elf_Auxargs *args; Elf_Auxinfo *argarray, *pos; struct proc *p; int error, issetugid; p = imgp->proc; args = (Elf64_Auxargs *)imgp->auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) { Elf_Addr *base; base = (Elf64_Addr *)*stack_base; base--; if (suword(base, (uint64_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, error; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (uintptr_t)arginfo; if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(void *)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * Starting with 2.24, glibc depends on a 16-byte stack alignment. * One "long argc" will be prepended later. */ vectp = (char **)((((uintptr_t)vectp + 8) & ~0xF) - 8); /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword(vectp++, 0) != 0) return (EFAULT); if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs; struct pcb *pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; set_pcb_flags(pcb, PCB_FULL_IRET); saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } /* * Copied from amd64/amd64/machdep.c * * XXX fpu state need? don't think so */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct proc *p; struct l_ucontext uc; struct l_sigcontext *context; struct trapframe *regs; unsigned long rflags; int error; ksiginfo_t ksi; regs = td->td_frame; error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc)); if (error != 0) return (error); p = td->td_proc; context = &uc.uc_mcontext; rflags = context->sc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ #define RFLAG_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } PROC_LOCK(p); linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); regs->tf_rdi = context->sc_rdi; regs->tf_rsi = context->sc_rsi; regs->tf_rdx = context->sc_rdx; regs->tf_rbp = context->sc_rbp; regs->tf_rbx = context->sc_rbx; regs->tf_rcx = context->sc_rcx; regs->tf_rax = context->sc_rax; regs->tf_rip = context->sc_rip; regs->tf_rsp = context->sc_rsp; regs->tf_r8 = context->sc_r8; regs->tf_r9 = context->sc_r9; regs->tf_r10 = context->sc_r10; regs->tf_r11 = context->sc_r11; regs->tf_r12 = context->sc_r12; regs->tf_r13 = context->sc_r13; regs->tf_r14 = context->sc_r14; regs->tf_r15 = context->sc_r15; regs->tf_cs = context->sc_cs; regs->tf_err = context->sc_err; regs->tf_rflags = rflags; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * copied from amd64/amd64/machdep.c * * Send an interrupt to process. */ static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct l_rt_sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; caddr_t sp; struct trapframe *regs; int sig, code; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u", catcher, sig, mask, code); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe); } else sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul); mtx_unlock(&psp->ps_mtx); /* Translate the signal. */ sig = bsd_to_linux_signal(sig); /* Save user context. */ bzero(&sf, sizeof(sf)); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask); sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); sf.sf_sc.uc_mcontext.sc_rdi = regs->tf_rdi; sf.sf_sc.uc_mcontext.sc_rsi = regs->tf_rsi; sf.sf_sc.uc_mcontext.sc_rdx = regs->tf_rdx; sf.sf_sc.uc_mcontext.sc_rbp = regs->tf_rbp; sf.sf_sc.uc_mcontext.sc_rbx = regs->tf_rbx; sf.sf_sc.uc_mcontext.sc_rcx = regs->tf_rcx; sf.sf_sc.uc_mcontext.sc_rax = regs->tf_rax; sf.sf_sc.uc_mcontext.sc_rip = regs->tf_rip; sf.sf_sc.uc_mcontext.sc_rsp = regs->tf_rsp; sf.sf_sc.uc_mcontext.sc_r8 = regs->tf_r8; sf.sf_sc.uc_mcontext.sc_r9 = regs->tf_r9; sf.sf_sc.uc_mcontext.sc_r10 = regs->tf_r10; sf.sf_sc.uc_mcontext.sc_r11 = regs->tf_r11; sf.sf_sc.uc_mcontext.sc_r12 = regs->tf_r12; sf.sf_sc.uc_mcontext.sc_r13 = regs->tf_r13; sf.sf_sc.uc_mcontext.sc_r14 = regs->tf_r14; sf.sf_sc.uc_mcontext.sc_r15 = regs->tf_r15; sf.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags; sf.sf_sc.uc_mcontext.sc_err = regs->tf_err; sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); sf.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rax = 0; regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ regs->tf_rdx = (register_t)&sfp->sf_sc; /* arg 3 in %rdx */ sf.sf_handler = catcher; /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig); /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = linux_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #define LINUX_VSYSCALL_START (-10UL << 20) #define LINUX_VSYSCALL_SZ 1024 const unsigned long linux_vsyscall_vector[] = { LINUX_SYS_gettimeofday, LINUX_SYS_linux_time, LINUX_SYS_linux_getcpu, }; static int linux_vsyscall(struct thread *td) { struct trapframe *frame; uint64_t retqaddr; int code, traced; int error; frame = td->td_frame; /* Check %rip for vsyscall area. */ if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START)) return (EINVAL); if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0) return (EINVAL); code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ; if (code >= nitems(linux_vsyscall_vector)) return (EINVAL); /* * vsyscall called as callq *(%rax), so we must * use return address from %rsp and also fixup %rsp. */ error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr)); if (error) return (error); frame->tf_rip = retqaddr; frame->tf_rax = linux_vsyscall_vector[code]; frame->tf_rsp += 8; traced = (frame->tf_flags & PSL_T); amd64_syscall(td, traced); return (0); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA48, .sv_usrstack = USRSTACK_LA48, .sv_psstrings = PS_STRINGS_LA48, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE_LA48, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(void *param) { amd64_lower_shared_page(&elf_linux_sysvec); linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNULINUX_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNULINUX_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_glibc2brandshort = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib64/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-musl-x86_64.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, &linux_glibc2brandshort, &linux_muslbrand, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux x86-64 ELF exec handler installed\n"); } else printf("cannot insert Linux x86-64 ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); FEATURE(linux64, "Linux 64bit support"); Index: head/sys/amd64/linux32/linux32_sysvec.c =================================================================== --- head/sys/amd64/linux32/linux32_sysvec.c (revision 366204) +++ head/sys/amd64/linux32/linux32_sysvec.c (revision 366205) @@ -1,1102 +1,1101 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_compat.h" #include __FBSDID("$FreeBSD$"); #ifndef COMPAT_FREEBSD32 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" #endif #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux32_locore_o_start; extern char _binary_linux32_locore_o_end; extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *iparams); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static void linux32_fixlimit(struct rlimit *rl, int which); static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux32_set_syscall_retval(struct thread *td, int error); #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux32_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) { Elf32_Addr *base; base = (Elf32_Addr *)*stack_base; base--; if (suword32(base, (uint32_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int oonstack; int sig; int code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; frame.sf_siginfo = PTROUT(&fp->sf_si); frame.sf_ucontext = PTROUT(&fp->sf_sc); /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn and libgcc unwind. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int oonstack; int sig, code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; frame.sf_sc.sc_esp = regs->tf_rsp; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; frame.sf_sc.sc_eip = regs->tf_rip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_rflags; frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; sigset_t bmask; l_sigset_t lmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; lmask.__mask = frame.sf_extramask[0]; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ regs->tf_rdi = frame.sf_sc.sc_edi; regs->tf_rsi = frame.sf_sc.sc_esi; regs->tf_rbp = frame.sf_sc.sc_ebp; regs->tf_rbx = frame.sf_sc.sc_ebx; regs->tf_rdx = frame.sf_sc.sc_edx; regs->tf_rcx = frame.sf_sc.sc_ecx; regs->tf_rax = frame.sf_sc.sc_eax; regs->tf_rip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_es = frame.sf_sc.sc_es; regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_gs = frame.sf_sc.sc_gs; regs->tf_rflags = eflags; regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ regs->tf_gs = context->sc_gs; regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_rdi = context->sc_edi; regs->tf_rsi = context->sc_esi; regs->tf_rbp = context->sc_ebp; regs->tf_rbx = context->sc_ebx; regs->tf_rdx = context->sc_edx; regs->tf_rcx = context->sc_ecx; regs->tf_rax = context->sc_eax; regs->tf_rip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_rflags = eflags; regs->tf_rsp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = PTRIN(lss->ss_sp); ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rsi; sa->args[4] = frame->tf_rdi; sa->args[5] = frame->tf_rbp; /* Unconfirmed */ sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } static void linux32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) frame->tf_rax = linux_to_bsd_errno(error); } } /* * Clear registers on exec * XXX copied from ia32_signal.c. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_gs = _ugssel; regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = (register_t)imgp->ps_strings; fpstate_drop(td); /* Do full restore on return so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); } /* * XXX copied from ia32_sysvec.c. */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, error; u_int32_t *vectp; char *stringp; uintptr_t destp, ustringp; struct linux32_ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; destp = (uintptr_t)arginfo; if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(uint32_t)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(uint32_t)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(uint32_t)); } vectp = (uint32_t *)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword32(vectp++, 0) != 0) return (EFAULT); if (suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword32(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "32-bit Linux emulation"); static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, &linux32_maxdsiz, 0, ""); static u_long linux32_maxssiz = LINUX32_MAXSSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, &linux32_maxssiz, 0, ""); static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); static void linux32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (linux32_maxdsiz != 0) { if (rl->rlim_cur > linux32_maxdsiz) rl->rlim_cur = linux32_maxdsiz; if (rl->rlim_max > linux32_maxdsiz) rl->rlim_max = linux32_maxdsiz; } break; case RLIMIT_STACK: if (linux32_maxssiz != 0) { if (rl->rlim_cur > linux32_maxssiz) rl->rlim_cur = linux32_maxssiz; if (rl->rlim_max > linux32_maxssiz) rl->rlim_max = linux32_maxssiz; } break; case RLIMIT_VMEM: if (linux32_maxvmem != 0) { if (rl->rlim_cur > linux32_maxvmem) rl->rlim_cur = linux32_maxvmem; if (rl->rlim_max > linux32_maxvmem) rl->rlim_max = linux32_maxvmem; } break; } } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX32_SYS_MAXSYSCALL, .sv_table = linux32_sysent, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux32_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = LINUX32_MAXUSER, .sv_usrstack = LINUX32_USRSTACK, .sv_psstrings = LINUX32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = linux32_fixlimit, .sv_maxssiz = &linux32_maxssiz, .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, .sv_set_syscall_retval = linux32_set_syscall_retval, .sv_fetch_syscall_args = linux32_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux32_locore_o_end - &_binary_linux32_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux32_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux32_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-musl-i386.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux32_ioctl_register_handler(*lihp); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux32_ioctl_unregister_handler(*lihp); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); FEATURE(linux, "Linux 32bit support"); Index: head/sys/arm/arm/syscall.c =================================================================== --- head/sys/arm/arm/syscall.c (revision 366204) +++ head/sys/arm/arm/syscall.c (revision 366205) @@ -1,170 +1,170 @@ /* $NetBSD: fault.c,v 1.45 2003/11/20 14:44:36 scw Exp $ */ /*- * Copyright 2004 Olivier Houchard * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1994-1997 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * fault.c * * Fault handlers * * Created : 28/11/94 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include void swi_handler(struct trapframe *); int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int error; sa = &td->td_sa; sa->code = td->td_frame->tf_r7; ap = &td->td_frame->tf_r0; if (sa->code == SYS_syscall) { sa->code = *ap++; sa->nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[_QUAD_LOWWORD]; sa->nap -= 2; ap += 2; } p = td->td_proc; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; error = 0; memcpy(sa->args, ap, sa->nap * sizeof(register_t)); - if (sa->narg > sa->nap) { + if (sa->callp->sy_narg > sa->nap) { error = copyin((void *)td->td_frame->tf_usr_sp, sa->args + - sa->nap, (sa->narg - sa->nap) * sizeof(register_t)); + sa->nap, (sa->callp->sy_narg - sa->nap) * + sizeof(register_t)); } if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = 0; } return (error); } #include "../../kern/subr_syscall.c" static void syscall(struct thread *td, struct trapframe *frame) { td->td_sa.nap = 4; syscallenter(td); syscallret(td); } void swi_handler(struct trapframe *frame) { struct thread *td = curthread; td->td_frame = frame; td->td_pticks = 0; /* * Enable interrupts if they were enabled before the exception. * Since all syscalls *should* come from user mode it will always * be safe to enable them, but check anyway. */ if (td->td_md.md_spinlock_count == 0) { if (__predict_true(frame->tf_spsr & PSR_I) == 0) enable_interrupts(PSR_I); if (__predict_true(frame->tf_spsr & PSR_F) == 0) enable_interrupts(PSR_F); } syscall(td, frame); } Index: head/sys/arm/cloudabi32/cloudabi32_sysvec.c =================================================================== --- head/sys/arm/cloudabi32/cloudabi32_sysvec.c (revision 366204) +++ head/sys/arm/cloudabi32/cloudabi32_sysvec.c (revision 366205) @@ -1,196 +1,195 @@ /*- * Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cloudabi32_syscallnames[]; extern struct sysent cloudabi32_sysent[]; static void cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs; exec_setregs(td, imgp, stack); /* * The stack now contains a pointer to the TCB and the auxiliary * vector. Let r0 point to the auxiliary vector, and set * tpidrurw to the TCB. */ regs = td->td_frame; regs->tf_r0 = stack + roundup(sizeof(cloudabi32_tcb_t), sizeof(register_t)); (void)cpu_set_user_tls(td, TO_PTR(stack)); } static int cloudabi32_fetch_syscall_args(struct thread *td) { struct trapframe *frame; struct syscall_args *sa; int error; frame = td->td_frame; sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_r12; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) return (ENOSYS); sa->callp = &cloudabi32_sysent[sa->code]; - sa->narg = sa->callp->sy_narg; /* Fetch system call arguments from registers and the stack. */ sa->args[0] = frame->tf_r0; sa->args[1] = frame->tf_r1; sa->args[2] = frame->tf_r2; sa->args[3] = frame->tf_r3; - if (sa->narg > 4) { + if (sa->callp->sy_narg > 4) { error = copyin((void *)td->td_frame->tf_usr_sp, &sa->args[4], - (sa->narg - 4) * sizeof(register_t)); + (sa->callp->sy_narg - 4) * sizeof(register_t)); if (error != 0) return (error); } /* Default system call return values. */ td->td_retval[0] = 0; td->td_retval[1] = frame->tf_r1; return (0); } static void cloudabi32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; switch (error) { case 0: /* System call succeeded. */ frame->tf_r0 = td->td_retval[0]; frame->tf_r1 = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; break; case ERESTART: /* Restart system call. */ frame->tf_pc -= 4; break; case EJUSTRETURN: break; default: /* System call returned an error. */ frame->tf_r0 = cloudabi_convert_errno(error); frame->tf_spsr |= PSR_C; break; } } static void cloudabi32_schedtail(struct thread *td) { struct trapframe *frame = td->td_frame; /* * Initial register values for processes returning from fork. * Make sure that we only set these values when forking, not * when creating a new thread. */ if ((td->td_pflags & TDP_FORKING) != 0) { frame->tf_r0 = CLOUDABI_PROCESS_CHILD; frame->tf_r1 = td->td_tid; } } int cloudabi32_thread_setregs(struct thread *td, const cloudabi32_threadattr_t *attr, uint32_t tcb) { struct trapframe *frame; stack_t stack; /* Perform standard register initialization. */ stack.ss_sp = TO_PTR(attr->stack); stack.ss_size = attr->stack_len; cpu_set_upcall(td, TO_PTR(attr->entry_point), NULL, &stack); /* * Pass in the thread ID of the new thread and the argument * pointer provided by the parent thread in as arguments to the * entry point. */ frame = td->td_frame; frame->tf_r0 = td->td_tid; frame->tf_r1 = attr->argument; /* Set up TLS. */ return (cpu_set_user_tls(td, TO_PTR(tcb))); } static struct sysentvec cloudabi32_elf_sysvec = { .sv_size = CLOUDABI32_SYS_MAXSYSCALL, .sv_table = cloudabi32_sysent, .sv_fixup = cloudabi32_fixup, .sv_name = "CloudABI ELF32", .sv_coredump = elf32_coredump, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi32_copyout_strings, .sv_setregs = cloudabi32_proc_setregs, .sv_flags = SV_ABI_CLOUDABI | SV_CAPSICUM | SV_ILP32, .sv_set_syscall_retval = cloudabi32_set_syscall_retval, .sv_fetch_syscall_args = cloudabi32_fetch_syscall_args, .sv_syscallnames = cloudabi32_syscallnames, .sv_schedtail = cloudabi32_schedtail, }; INIT_SYSENTVEC(elf_sysvec, &cloudabi32_elf_sysvec); Elf32_Brandinfo cloudabi32_brand = { .brand = ELFOSABI_CLOUDABI, .machine = EM_ARM, .sysvec = &cloudabi32_elf_sysvec, .flags = BI_BRAND_ONLY_STATIC, }; Index: head/sys/arm/include/proc.h =================================================================== --- head/sys/arm/include/proc.h (revision 366204) +++ head/sys/arm/include/proc.h (revision 366205) @@ -1,89 +1,88 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)proc.h 7.1 (Berkeley) 5/15/91 * from: FreeBSD: src/sys/i386/include/proc.h,v 1.11 2001/06/29 * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ #include struct md_utrap { utrap_entry_t *ut_precise[UT_MAX]; /* must be first */ int ut_refcnt; }; struct mdthread { int md_spinlock_count; /* (k) */ register_t md_saved_cspr; /* (k) */ register_t md_spurflt_addr; /* (k) Spurious page fault address. */ int md_ptrace_instr; int md_ptrace_addr; int md_ptrace_instr_alt; int md_ptrace_addr_alt; #if __ARM_ARCH < 6 register_t md_tp; void *md_ras_start; void *md_ras_end; #endif }; struct mdproc { struct md_utrap *md_utrap; void *md_sigtramp; }; #define KINFO_PROC_SIZE 816 #define MAXARGS 8 /* * This holds the syscall state for a single system call. * As some syscall arguments may be 64-bit aligned we need to ensure the * args value is 64-bit aligned. The ABI will then ensure any 64-bit * arguments are already correctly aligned, even if they were passed in * via registers, we just need to make sure we copy them to an aligned * buffer. */ struct syscall_args { u_int code; struct sysent *callp; register_t args[MAXARGS]; - int narg; u_int nap; } __aligned(8); #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/arm64/arm64/elf32_machdep.c =================================================================== --- head/sys/arm64/arm64/elf32_machdep.c (revision 366204) +++ head/sys/arm64/arm64/elf32_machdep.c (revision 366205) @@ -1,259 +1,259 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014, 2017 Andrew Turner. * Copyright (c) 2018 Olivier Houchard * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define FREEBSD32_MINUSER 0x00001000 #define FREEBSD32_MAXUSER ((1ul << 32) - PAGE_SIZE) #define FREEBSD32_SHAREDPAGE (FREEBSD32_MAXUSER - PAGE_SIZE) #define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE extern const char *freebsd32_syscallnames[]; extern char aarch32_sigcode[]; extern int sz_aarch32_sigcode; static int freebsd32_fetch_syscall_args(struct thread *td); static void freebsd32_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void freebsd32_set_syscall_retval(struct thread *, int); static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *, uint32_t *); extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_transtrap = NULL, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = freebsd32_sendsig, .sv_sigcode = aarch32_sigcode, .sv_szsigcode = &sz_aarch32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_setregs, .sv_fixlimit = NULL, // XXX .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = freebsd32_set_syscall_retval, .sv_fetch_syscall_args = freebsd32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info); static boolean_t elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf32_Ehdr *hdr; /* Check if we support AArch32 */ if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) != ID_AA64PFR0_EL0_64_32) return (FALSE); #define EF_ARM_EABI_VERSION(x) (((x) & EF_ARM_EABIMASK) >> 24) #define EF_ARM_EABI_FREEBSD_MIN 4 hdr = (const Elf32_Ehdr *)imgp->image_header; if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary " "(rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (FALSE); } return (TRUE); } static int freebsd32_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; - int error, i, nap; + int error, i, nap, narg; unsigned int args[4]; nap = 4; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; /* r7 is the syscall id */ sa->code = td->td_frame->tf_x[7]; if (sa->code == SYS_syscall) { sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[1]; nap -= 2; ap += 2; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; + narg = sa->callp->sy_narg; for (i = 0; i < nap; i++) sa->args[i] = ap[i]; - if (sa->narg > nap) { - if ((sa->narg - nap) > nitems(args)) + if (narg > nap) { + if (narg - nap > nitems(args)) panic("Too many system call arguiments"); error = copyin((void *)td->td_frame->tf_x[13], args, - (sa->narg - nap) * sizeof(int)); - for (i = 0; i < (sa->narg - nap); i++) + (narg - nap) * sizeof(int)); + for (i = 0; i < (narg - nap); i++) sa->args[i + nap] = args[i]; } td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } static void freebsd32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ if ((frame->tf_spsr & PSR_T) != 0) frame->tf_elr -= 2; //THUMB_INSN_SIZE; else frame->tf_elr -= 4; //INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_x[0] = error; frame->tf_spsr |= PSR_C; break; } } static void freebsd32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = stack; /* SP_usr is mapped to x13 */ tf->tf_x[13] = stack; /* LR_usr is mapped to x14 */ tf->tf_x[14] = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; tf->tf_spsr = PSR_M_32; } void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { /* XXX: VFP */ } Index: head/sys/arm64/arm64/trap.c =================================================================== --- head/sys/arm64/arm64/trap.c (revision 366204) +++ head/sys/arm64/arm64/trap.c (revision 366205) @@ -1,612 +1,611 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif #ifdef VFP #include #endif #ifdef KDB #include #endif #ifdef DDB #include #endif extern register_t fsu_intr_fault; /* Called from exception.S */ void do_el1h_sync(struct thread *, struct trapframe *); void do_el0_sync(struct thread *, struct trapframe *); void do_el0_error(struct trapframe *); void do_serror(struct trapframe *); void unhandled_exception(struct trapframe *); static void print_registers(struct trapframe *frame); int (*dtrace_invop_jump_addr)(struct trapframe *); typedef void (abort_handler)(struct thread *, struct trapframe *, uint64_t, uint64_t, int); static abort_handler align_abort; static abort_handler data_abort; static abort_handler external_abort; static abort_handler *abort_handlers[] = { [ISS_DATA_DFSC_TF_L0] = data_abort, [ISS_DATA_DFSC_TF_L1] = data_abort, [ISS_DATA_DFSC_TF_L2] = data_abort, [ISS_DATA_DFSC_TF_L3] = data_abort, [ISS_DATA_DFSC_AFF_L1] = data_abort, [ISS_DATA_DFSC_AFF_L2] = data_abort, [ISS_DATA_DFSC_AFF_L3] = data_abort, [ISS_DATA_DFSC_PF_L1] = data_abort, [ISS_DATA_DFSC_PF_L2] = data_abort, [ISS_DATA_DFSC_PF_L3] = data_abort, [ISS_DATA_DFSC_ALIGN] = align_abort, [ISS_DATA_DFSC_EXT] = external_abort, }; static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; ksi.ksi_trapno = trapno; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int nap; - nap = 8; + nap = MAXARGS; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); - if (sa->narg > nap) - panic("ARM64TODO: Could we have more than 8 args?"); + if (sa->callp->sy_narg > nap) + panic("ARM64TODO: Could we have more than %d args?", MAXARGS); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" /* * Test for fault generated by given access instruction in * bus_peek_ or bus_poke_ bus function. */ extern uint32_t generic_bs_peek_1f, generic_bs_peek_2f; extern uint32_t generic_bs_peek_4f, generic_bs_peek_8f; extern uint32_t generic_bs_poke_1f, generic_bs_poke_2f; extern uint32_t generic_bs_poke_4f, generic_bs_poke_8f; static bool test_bs_fault(void *addr) { return (addr == &generic_bs_peek_1f || addr == &generic_bs_peek_2f || addr == &generic_bs_peek_4f || addr == &generic_bs_peek_8f || addr == &generic_bs_poke_1f || addr == &generic_bs_poke_2f || addr == &generic_bs_poke_4f || addr == &generic_bs_poke_8f); } static void svc_handler(struct thread *td, struct trapframe *frame) { if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) { syscallenter(td); syscallret(td); } else { call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr, ESR_ELx_EXCEPTION(frame->tf_esr)); userret(td, frame); } } static void align_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { if (!lower) { print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Misaligned access from kernel space!"); } call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr, ESR_ELx_EXCEPTION(frame->tf_esr)); userret(td, frame); } static void external_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { /* * Try to handle synchronous external aborts caused by * bus_space_peek() and/or bus_space_poke() functions. */ if (!lower && test_bs_fault((void *)frame->tf_elr)) { frame->tf_elr = (uint64_t)generic_bs_fault; return; } print_registers(frame); printf(" far: %16lx\n", far); panic("Unhandled EL%d external data abort", lower ? 0: 1); } static void data_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { struct vm_map *map; struct proc *p; struct pcb *pcb; vm_prot_t ftype; int error, sig, ucode; #ifdef KDB bool handled; #endif /* * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive * and Store-Exclusive instruction usage restrictions", state * of the exclusive monitors after data abort exception is unknown. */ clrex(); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif pcb = td->td_pcb; p = td->td_proc; if (lower) map = &p->p_vmspace->vm_map; else { intr_enable(); /* The top bit tells us which range to use */ if (far >= VM_MAXUSER_ADDRESS) { map = kernel_map; } else { map = &p->p_vmspace->vm_map; if (map == NULL) map = kernel_map; } } /* * Try to handle translation, access flag, and permission faults. * Translation faults may occur as a result of the required * break-before-make sequence used when promoting or demoting * superpages. Such faults must not occur while holding the pmap lock, * or pmap_fault() will recurse on that lock. */ if ((lower || map == kernel_map || pcb->pcb_onfault != 0) && pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) return; KASSERT(td->td_md.md_spinlock_count == 0, ("data abort with spinlock held")); if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("data abort in critical section or under mutex"); } switch (ESR_ELx_EXCEPTION(esr)) { case EXCP_INSN_ABORT: case EXCP_INSN_ABORT_L: ftype = VM_PROT_EXECUTE; break; default: ftype = (esr & ISS_DATA_WnR) == 0 ? VM_PROT_READ : VM_PROT_READ | VM_PROT_WRITE; break; } /* Fault in the page. */ error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { if (lower) { call_trapsignal(td, sig, ucode, (void *)far, ESR_ELx_EXCEPTION(esr)); } else { if (td->td_intr_nesting_level == 0 && pcb->pcb_onfault != 0) { frame->tf_x[0] = error; frame->tf_elr = pcb->pcb_onfault; return; } printf("Fatal data abort:\n"); print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(ESR_ELx_EXCEPTION(esr), 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("vm_fault failed: %lx", frame->tf_elr); } } if (lower) userret(td, frame); } static void print_registers(struct trapframe *frame) { u_int reg; for (reg = 0; reg < nitems(frame->tf_x); reg++) { printf(" %sx%d: %16lx\n", (reg < 10) ? " " : "", reg, frame->tf_x[reg]); } printf(" sp: %16lx\n", frame->tf_sp); printf(" lr: %16lx\n", frame->tf_lr); printf(" elr: %16lx\n", frame->tf_elr); printf("spsr: %8x\n", frame->tf_spsr); } void do_el1h_sync(struct thread *td, struct trapframe *frame) { struct trapframe *oframe; uint32_t exception; uint64_t esr, far; int dfsc; /* Read the esr register to get the exception details */ esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR4(KTR_TRAP, "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); oframe = td->td_frame; switch (exception) { case EXCP_BRK: case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: break; default: td->td_frame = frame; break; } switch (exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP if ((td->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) { vfp_restore_state(); } else #endif { print_registers(frame); printf(" esr: %.8lx\n", esr); panic("VFP exception in the kernel"); } break; case EXCP_INSN_ABORT: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); dfsc = esr & ISS_DATA_DFSC_MASK; if (dfsc < nitems(abort_handlers) && abort_handlers[dfsc] != NULL) { abort_handlers[dfsc](td, frame, esr, far, 0); } else { print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Unhandled EL1 %s abort: %x", exception == EXCP_INSN_ABORT ? "instruction" : "data", dfsc); } break; case EXCP_BRK: #ifdef KDTRACE_HOOKS if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \ dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(frame); break; } #endif #ifdef KDB kdb_trap(exception, 0, (td->td_frame != NULL) ? td->td_frame : frame); #else panic("No debugger in kernel.\n"); #endif frame->tf_elr += 4; break; case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: #ifdef KDB kdb_trap(exception, 0, (td->td_frame != NULL) ? td->td_frame : frame); #else panic("No debugger in kernel.\n"); #endif break; case EXCP_UNKNOWN: if (undef_insn(1, frame)) break; /* FALLTHROUGH */ default: print_registers(frame); printf(" far: %16lx\n", READ_SPECIALREG(far_el1)); panic("Unknown kernel exception %x esr_el1 %lx\n", exception, esr); } td->td_frame = oframe; } void do_el0_sync(struct thread *td, struct trapframe *frame) { pcpu_bp_harden bp_harden; uint32_t exception; uint64_t esr, far; int dfsc; /* Check we have a sane environment when entering from userland */ KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS, ("Invalid pcpu address from userland: %p (tpidr %lx)", get_pcpu(), READ_SPECIALREG(tpidr_el1))); esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); switch (exception) { case EXCP_INSN_ABORT_L: far = READ_SPECIALREG(far_el1); /* * Userspace may be trying to train the branch predictor to * attack the kernel. If we are on a CPU affected by this * call the handler to clear the branch predictor state. */ if (far > VM_MAXUSER_ADDRESS) { bp_harden = PCPU_GET(bp_harden); if (bp_harden != NULL) bp_harden(); } break; case EXCP_UNKNOWN: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); break; } intr_enable(); CTR4(KTR_TRAP, "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); switch (exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP vfp_restore_state(); #else panic("VFP exception in userland"); #endif break; case EXCP_SVC32: case EXCP_SVC64: svc_handler(td, frame); break; case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: dfsc = esr & ISS_DATA_DFSC_MASK; if (dfsc < nitems(abort_handlers) && abort_handlers[dfsc] != NULL) abort_handlers[dfsc](td, frame, esr, far, 1); else { print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Unhandled EL0 %s abort: %x", exception == EXCP_INSN_ABORT_L ? "instruction" : "data", dfsc); } break; case EXCP_UNKNOWN: if (!undef_insn(0, frame)) call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far, exception); userret(td, frame); break; case EXCP_SP_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp, exception); userret(td, frame); break; case EXCP_PC_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_BRKPT_EL0: case EXCP_BRK: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_MSR: call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr, exception); userret(td, frame); break; case EXCP_SOFTSTP_EL0: td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) & ~DBG_MDSCR_SS); call_trapsignal(td, SIGTRAP, TRAP_TRACE, (void *)frame->tf_elr, exception); userret(td, frame); break; default: call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr, exception); userret(td, frame); break; } KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Kernel VFP flags set while entering userspace")); KASSERT( td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate, ("Kernel VFP state in use when entering userspace")); } /* * TODO: We will need to handle these later when we support ARMv8.2 RAS. */ void do_serror(struct trapframe *frame) { uint64_t esr, far; far = READ_SPECIALREG(far_el1); esr = frame->tf_esr; print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Unhandled System Error"); } void unhandled_exception(struct trapframe *frame) { uint64_t esr, far; far = READ_SPECIALREG(far_el1); esr = frame->tf_esr; print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Unhandled exception"); } Index: head/sys/arm64/cloudabi32/cloudabi32_sysvec.c =================================================================== --- head/sys/arm64/cloudabi32/cloudabi32_sysvec.c (revision 366204) +++ head/sys/arm64/cloudabi32/cloudabi32_sysvec.c (revision 366205) @@ -1,204 +1,203 @@ /*- * Copyright (c) 2015-2017 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cloudabi32_syscallnames[]; extern struct sysent cloudabi32_sysent[]; static void cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs; regs = td->td_frame; memset(regs, 0, sizeof(*regs)); regs->tf_x[0] = stack + roundup(sizeof(cloudabi32_tcb_t), sizeof(register_t)); regs->tf_x[13] = STACKALIGN(stack); regs->tf_elr = imgp->entry_addr; regs->tf_spsr |= PSR_AARCH32; (void)cpu_set_user_tls(td, TO_PTR(stack)); } static int cloudabi32_fetch_syscall_args(struct thread *td) { struct trapframe *frame; struct syscall_args *sa; int error; frame = td->td_frame; sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_x[0]; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) return (ENOSYS); sa->callp = &cloudabi32_sysent[sa->code]; - sa->narg = sa->callp->sy_narg; /* * Fetch system call arguments. * * The vDSO has already made sure that the arguments are * eight-byte aligned. Pointers and size_t parameters are * zero-extended. This makes it possible to copy in the * arguments directly. As long as the call doesn't use 32-bit * data structures, we can just invoke the same system call * implementation used by 64-bit processes. */ error = copyin((void *)frame->tf_x[2], sa->args, - sa->narg * sizeof(sa->args[0])); + sa->callp->sy_narg * sizeof(sa->args[0])); if (error != 0) return (error); /* Default system call return values. */ td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } static void cloudabi32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; switch (error) { case 0: /* * System call succeeded. * * Simply copy out the 64-bit return values into the * same buffer provided for system call arguments. The * vDSO will copy them to the right spot, truncating * pointers and size_t values to 32 bits. */ if (copyout(td->td_retval, (void *)frame->tf_x[2], sizeof(td->td_retval)) == 0) { frame->tf_x[0] = 0; frame->tf_spsr &= ~PSR_C; } else { frame->tf_x[0] = CLOUDABI_EFAULT; frame->tf_spsr |= PSR_C; } break; case ERESTART: /* Restart system call. */ frame->tf_elr -= 4; break; case EJUSTRETURN: break; default: /* System call returned an error. */ frame->tf_x[0] = cloudabi_convert_errno(error); frame->tf_spsr |= PSR_C; break; } } static void cloudabi32_schedtail(struct thread *td) { struct trapframe *frame = td->td_frame; register_t retval[2]; /* Return values for processes returning from fork. */ if ((td->td_pflags & TDP_FORKING) != 0) { retval[0] = CLOUDABI_PROCESS_CHILD; retval[1] = td->td_tid; copyout(retval, (void *)frame->tf_x[2], sizeof(retval)); } frame->tf_spsr |= PSR_AARCH32; } int cloudabi32_thread_setregs(struct thread *td, const cloudabi32_threadattr_t *attr, uint32_t tcb) { struct trapframe *frame; /* * Pass in the thread ID of the new thread and the argument * pointer provided by the parent thread in as arguments to the * entry point. */ frame = td->td_frame; memset(frame, 0, sizeof(*frame)); frame->tf_x[0] = td->td_tid; frame->tf_x[1] = attr->argument; frame->tf_x[13] = STACKALIGN(attr->stack + attr->stack_len); frame->tf_elr = attr->entry_point; /* Set up TLS. */ return (cpu_set_user_tls(td, TO_PTR(tcb))); } static struct sysentvec cloudabi32_elf_sysvec = { .sv_size = CLOUDABI32_SYS_MAXSYSCALL, .sv_table = cloudabi32_sysent, .sv_fixup = cloudabi32_fixup, .sv_name = "CloudABI ELF32", .sv_coredump = elf32_coredump, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = (uintmax_t)1 << 32, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi32_copyout_strings, .sv_setregs = cloudabi32_proc_setregs, .sv_flags = SV_ABI_CLOUDABI | SV_CAPSICUM | SV_ILP32, .sv_set_syscall_retval = cloudabi32_set_syscall_retval, .sv_fetch_syscall_args = cloudabi32_fetch_syscall_args, .sv_syscallnames = cloudabi32_syscallnames, .sv_schedtail = cloudabi32_schedtail, }; INIT_SYSENTVEC(elf_sysvec, &cloudabi32_elf_sysvec); Elf32_Brandinfo cloudabi32_brand = { .brand = ELFOSABI_CLOUDABI, .machine = EM_ARM, .sysvec = &cloudabi32_elf_sysvec, .flags = BI_BRAND_ONLY_STATIC, }; Index: head/sys/arm64/cloudabi64/cloudabi64_sysvec.c =================================================================== --- head/sys/arm64/cloudabi64/cloudabi64_sysvec.c (revision 366204) +++ head/sys/arm64/cloudabi64/cloudabi64_sysvec.c (revision 366205) @@ -1,188 +1,187 @@ /*- * Copyright (c) 2015 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cloudabi64_syscallnames[]; extern struct sysent cloudabi64_sysent[]; static void cloudabi64_proc_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs; exec_setregs(td, imgp, stack); /* * The stack now contains a pointer to the TCB and the auxiliary * vector. Let x0 point to the auxiliary vector, and set * tpidr_el0 to the TCB. */ regs = td->td_frame; regs->tf_x[0] = stack + roundup(sizeof(cloudabi64_tcb_t), sizeof(register_t)); (void)cpu_set_user_tls(td, TO_PTR(stack)); } static int cloudabi64_fetch_syscall_args(struct thread *td) { struct trapframe *frame; struct syscall_args *sa; int i; frame = td->td_frame; sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_x[8]; if (sa->code >= CLOUDABI64_SYS_MAXSYSCALL) return (ENOSYS); sa->callp = &cloudabi64_sysent[sa->code]; - sa->narg = sa->callp->sy_narg; /* Fetch system call arguments. */ for (i = 0; i < MAXARGS; i++) sa->args[i] = frame->tf_x[i]; /* Default system call return values. */ td->td_retval[0] = 0; td->td_retval[1] = frame->tf_x[1]; return (0); } static void cloudabi64_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; switch (error) { case 0: /* System call succeeded. */ frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; break; case ERESTART: /* Restart system call. */ frame->tf_elr -= 4; break; case EJUSTRETURN: break; default: /* System call returned an error. */ frame->tf_x[0] = cloudabi_convert_errno(error); frame->tf_spsr |= PSR_C; break; } } static void cloudabi64_schedtail(struct thread *td) { struct trapframe *frame = td->td_frame; /* * Initial register values for processes returning from fork. * Make sure that we only set these values when forking, not * when creating a new thread. */ if ((td->td_pflags & TDP_FORKING) != 0) { frame->tf_x[0] = CLOUDABI_PROCESS_CHILD; frame->tf_x[1] = td->td_tid; } } int cloudabi64_thread_setregs(struct thread *td, const cloudabi64_threadattr_t *attr, uint64_t tcb) { struct trapframe *frame; stack_t stack; /* Perform standard register initialization. */ stack.ss_sp = TO_PTR(attr->stack); stack.ss_size = attr->stack_len; cpu_set_upcall(td, TO_PTR(attr->entry_point), NULL, &stack); /* * Pass in the thread ID of the new thread and the argument * pointer provided by the parent thread in as arguments to the * entry point. */ frame = td->td_frame; frame->tf_x[0] = td->td_tid; frame->tf_x[1] = attr->argument; /* Set up TLS. */ return (cpu_set_user_tls(td, TO_PTR(tcb))); } static struct sysentvec cloudabi64_elf_sysvec = { .sv_size = CLOUDABI64_SYS_MAXSYSCALL, .sv_table = cloudabi64_sysent, .sv_fixup = cloudabi64_fixup, .sv_name = "CloudABI ELF64", .sv_coredump = elf64_coredump, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi64_copyout_strings, .sv_setregs = cloudabi64_proc_setregs, .sv_flags = SV_ABI_CLOUDABI | SV_CAPSICUM | SV_LP64, .sv_set_syscall_retval = cloudabi64_set_syscall_retval, .sv_fetch_syscall_args = cloudabi64_fetch_syscall_args, .sv_syscallnames = cloudabi64_syscallnames, .sv_schedtail = cloudabi64_schedtail, }; INIT_SYSENTVEC(elf_sysvec, &cloudabi64_elf_sysvec); Elf64_Brandinfo cloudabi64_brand = { .brand = ELFOSABI_CLOUDABI, .machine = EM_AARCH64, .sysvec = &cloudabi64_elf_sysvec, .flags = BI_CAN_EXEC_DYN | BI_BRAND_ONLY_STATIC, }; Index: head/sys/arm64/include/proc.h =================================================================== --- head/sys/arm64/include/proc.h (revision 366204) +++ head/sys/arm64/include/proc.h (revision 366205) @@ -1,71 +1,70 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)proc.h 7.1 (Berkeley) 5/15/91 * from: FreeBSD: src/sys/i386/include/proc.h,v 1.11 2001/06/29 * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ struct mdthread { int md_spinlock_count; /* (k) */ register_t md_saved_daif; /* (k) */ }; struct mdproc { long md_dummy; }; #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 816 #define MAXARGS 8 struct syscall_args { u_int code; struct sysent *callp; register_t args[MAXARGS]; - int narg; }; #ifdef _KERNEL #include #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ (total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb); \ (used) = (char *)td->td_kstack + \ td->td_kstack_pages * PAGE_SIZE - \ (char *)&td; \ } while (0) #endif #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/arm64/linux/linux_sysvec.c =================================================================== --- head/sys/arm64/linux/linux_sysvec.c (revision 366204) +++ head/sys/arm64/linux/linux_sysvec.c (revision 366205) @@ -1,571 +1,570 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * Copyright (c) 2018 Turing Robotic Industries Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64elf, 1); const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static int linux_elf_fixup(uintptr_t *stack_base, struct image_params *iparams); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(const void *param); static void linux_vdso_deinstall(const void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static int linux_vsyscall(struct thread *td); /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /* DTrace probes */ LIN_SDT_PROBE_DEFINE2(sysvec, linux_translate_traps, todo, "int", "int"); LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_copyout_auxargs, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sigreturn, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sendsig, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vsyscall, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_install, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_deinstall, todo); /* LINUXTODO: do we have traps to translate? */ static int linux_translate_traps(int signal, int trap_code) { LIN_SDT_PROBE2(sysvec, linux_translate_traps, todo, signal, trap_code); return (signal); } LINUX_VDSO_SYM_CHAR(linux_platform); static int linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct syscall_args *sa; register_t *ap; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; /* LINUXTODO: generic syscall? */ if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; - if (sa->narg > 8) - panic("ARM64TODO: Could we have more than 8 args?"); - memcpy(sa->args, ap, 8 * sizeof(register_t)); + if (sa->callp->sy_narg > MAXARGS) + panic("ARM64TODO: Could we have more than %d args?", MAXARGS); + memcpy(sa->args, ap, MAXARGS * sizeof(register_t)); td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { td->td_retval[1] = td->td_frame->tf_x[1]; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) { td->td_frame->tf_x[0] = linux_to_bsd_errno(error); } } } static int linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) { Elf_Auxargs *args; Elf_Auxinfo *argarray, *pos; struct proc *p; int error, issetugid; LIN_SDT_PROBE0(sysvec, linux_copyout_auxargs, todo); p = imgp->proc; args = (Elf64_Auxargs *)imgp->auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); #if 0 /* LINUXTODO: implement arm64 LINUX_AT_HWCAP */ AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); #endif AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); #if 0 /* LINUXTODO: implement arm64 LINUX_AT_PLATFORM */ AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); #endif AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_elf_fixup(uintptr_t *stack_base, struct image_params *imgp) { LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo); return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. * LINUXTODO: deduplicate against other linuxulator archs */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; int argc, envc, error; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (uintptr_t)arginfo; if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(void *)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for argc and the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1; vectp = (char **)STACKALIGN(vectp); /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); if (suword(vectp++, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword(vectp++, 0) != 0) return (EFAULT); if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs = td->td_frame; /* LINUXTODO: validate */ LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo); memset(regs, 0, sizeof(*regs)); /* glibc start.S registers function pointer in x0 with atexit. */ regs->tf_sp = stack; #if 0 /* LINUXTODO: See if this is used. */ regs->tf_lr = imgp->entry_addr; #else regs->tf_lr = 0xffffffffffffffff; #endif regs->tf_elr = imgp->entry_addr; } int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_rt_sigreturn, todo); return (EDOOFUS); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_rt_sendsig, todo); } static int linux_vsyscall(struct thread *td) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_vsyscall, todo); return (EDOOFUS); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_elf_fixup, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, /* XXX */ .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(const void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("invalid Linux VDSO size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); memcpy(linux_shared_page_mapping, elf_linux_sysvec.sv_sigcode, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; printf("LINUXTODO: %s: fix linux_kplatform\n", __func__); #if 0 linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); #else linux_kplatform = "arm64"; #endif } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(const void *param) { LIN_SDT_PROBE0(sysvec, linux_vdso_deinstall, todo); __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNU_ABI_LINUX = 0; /* LINUXTODO: deduplicate */ static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNU_ABI_LINUX) return (false); *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_AARCH64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; struct linux_ioctl_handler**lihp; int error; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux arm64 ELF exec handler installed\n"); } break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); FEATURE(linux64, "AArch64 Linux 64bit support"); Index: head/sys/i386/cloudabi32/cloudabi32_sysvec.c =================================================================== --- head/sys/i386/cloudabi32/cloudabi32_sysvec.c (revision 366204) +++ head/sys/i386/cloudabi32/cloudabi32_sysvec.c (revision 366205) @@ -1,207 +1,206 @@ /*- * Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cloudabi32_syscallnames[]; extern struct sysent cloudabi32_sysent[]; static int cloudabi32_fixup_tcb(uintptr_t *stack_base, struct image_params *imgp) { int error; uint32_t args[2]; /* Place auxiliary vector and TCB on the stack. */ error = cloudabi32_fixup(stack_base, imgp); if (error != 0) return (error); /* * On i386, the TCB is referred to by %gs:0. Reuse the empty * space normally used by the return address (args[0]) to store * a single element array, containing a pointer to the TCB. %gs * base will point to this. * * Also let the first argument of the entry point (args[1]) * refer to the auxiliary vector, which is stored right after * the TCB. */ args[0] = *stack_base; args[1] = *stack_base + roundup(sizeof(cloudabi32_tcb_t), sizeof(register_t)); *stack_base -= roundup(sizeof(args), sizeof(register_t)); return (copyout(args, (void *)*stack_base, sizeof(args))); } static void cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { exec_setregs(td, imgp, stack); (void)cpu_set_user_tls(td, TO_PTR(stack)); } static int cloudabi32_fetch_syscall_args(struct thread *td) { struct trapframe *frame; struct syscall_args *sa; int error; frame = td->td_frame; sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_eax; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) return (ENOSYS); sa->callp = &cloudabi32_sysent[sa->code]; - sa->narg = sa->callp->sy_narg; /* Fetch system call arguments from the stack. */ error = copyin((void *)(frame->tf_esp + 4), sa->args, - sa->narg * sizeof(sa->args[0])); + sa->callp->sy_narg * sizeof(sa->args[0])); if (error != 0) return (error); /* Default system call return values. */ td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; return (0); } static void cloudabi32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; switch (error) { case 0: /* System call succeeded. */ frame->tf_eax = td->td_retval[0]; frame->tf_edx = td->td_retval[1]; frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* Restart system call. */ frame->tf_eip -= frame->tf_err; break; case EJUSTRETURN: break; default: /* System call returned an error. */ frame->tf_eax = cloudabi_convert_errno(error); frame->tf_eflags |= PSL_C; break; } } static void cloudabi32_schedtail(struct thread *td) { struct trapframe *frame = td->td_frame; /* Initial register values for processes returning from fork. */ frame->tf_eax = CLOUDABI_PROCESS_CHILD; frame->tf_edx = td->td_tid; } int cloudabi32_thread_setregs(struct thread *td, const cloudabi32_threadattr_t *attr, uint32_t tcb) { stack_t stack; uint32_t args[3]; void *frameptr; int error; /* Perform standard register initialization. */ stack.ss_sp = TO_PTR(attr->stack); stack.ss_size = attr->stack_len - sizeof(args); cpu_set_upcall(td, TO_PTR(attr->entry_point), NULL, &stack); /* * Copy the arguments for the thread entry point onto the stack * (args[1] and args[2]). Similar to process startup, use the * otherwise unused return address (args[0]) for TLS. */ args[0] = tcb; args[1] = td->td_tid; args[2] = attr->argument; frameptr = (void *)td->td_frame->tf_esp; error = copyout(args, frameptr, sizeof(args)); if (error != 0) return (error); return (cpu_set_user_tls(td, frameptr)); } static struct sysentvec cloudabi32_elf_sysvec = { .sv_size = CLOUDABI32_SYS_MAXSYSCALL, .sv_table = cloudabi32_sysent, .sv_fixup = cloudabi32_fixup_tcb, .sv_name = "CloudABI ELF32", .sv_coredump = elf32_coredump, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi32_copyout_strings, .sv_setregs = cloudabi32_proc_setregs, .sv_flags = SV_ABI_CLOUDABI | SV_CAPSICUM | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cloudabi32_set_syscall_retval, .sv_fetch_syscall_args = cloudabi32_fetch_syscall_args, .sv_syscallnames = cloudabi32_syscallnames, .sv_schedtail = cloudabi32_schedtail, }; INIT_SYSENTVEC(elf_sysvec, &cloudabi32_elf_sysvec); Elf32_Brandinfo cloudabi32_brand = { .brand = ELFOSABI_CLOUDABI, .machine = EM_386, .sysvec = &cloudabi32_elf_sysvec, .flags = BI_BRAND_ONLY_STATIC, }; Index: head/sys/i386/i386/trap.c =================================================================== --- head/sys/i386/i386/trap.c (revision 366204) +++ head/sys/i386/i386/trap.c (revision 366205) @@ -1,1150 +1,1149 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif void trap(struct trapframe *frame); void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, bool, vm_offset_t, int *, int *); static void trap_fatal(struct trapframe *, vm_offset_t); #ifdef KDTRACE_HOOKS static bool trap_user_dtrace(struct trapframe *, int (**hook)(struct trapframe *)); #endif void dblfault_handler(void); extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); extern uint64_t pg_nx; struct trap_data { bool ei; const char *msg; }; static const struct trap_data trap_data[] = { [T_PRIVINFLT] = { .ei = true, .msg = "privileged instruction fault" }, [T_BPTFLT] = { .ei = false, .msg = "breakpoint instruction fault" }, [T_ARITHTRAP] = { .ei = true, .msg = "arithmetic trap" }, [T_PROTFLT] = { .ei = true, .msg = "general protection fault" }, [T_TRCTRAP] = { .ei = false, .msg = "debug exception" }, [T_PAGEFLT] = { .ei = true, .msg = "page fault" }, [T_ALIGNFLT] = { .ei = true, .msg = "alignment fault" }, [T_DIVIDE] = { .ei = true, .msg = "integer divide fault" }, [T_NMI] = { .ei = false, .msg = "non-maskable interrupt trap" }, [T_OFLOW] = { .ei = true, .msg = "overflow trap" }, [T_BOUND] = { .ei = true, .msg = "FPU bounds check fault" }, [T_DNA] = { .ei = true, .msg = "FPU device not available" }, [T_DOUBLEFLT] = { .ei = false, .msg = "double fault" }, [T_FPOPFLT] = { .ei = true, .msg = "FPU operand fetch fault" }, [T_TSSFLT] = { .ei = true, .msg = "invalid TSS fault" }, [T_SEGNPFLT] = { .ei = true, .msg = "segment not present fault" }, [T_STKFLT] = { .ei = true, .msg = "stack fault" }, [T_MCHK] = { .ei = true, .msg = "machine check trap" }, [T_XMMFLT] = { .ei = true, .msg = "SIMD floating-point exception" }, [T_DTRACE_RET] ={ .ei = true, .msg = "DTrace pid return trap" }, }; static bool trap_enable_intr(int trapno) { MPASS(trapno > 0); if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL) return (trap_data[trapno].ei); return (false); } static const char * trap_msg(int trapno) { const char *res; static const char unkn[] = "UNKNOWN"; res = NULL; if (trapno < nitems(trap_data)) res = trap_data[trapno].msg; if (res == NULL) res = unkn; return (res); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; int pf, signo, ucode; u_int type; register_t addr, dr6; vm_offset_t eva; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif td = curthread; p = td->td_proc; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; KASSERT((read_eflags() & PSL_I) == 0, ("trap: interrupts enabled, type %d frame %p", type, frame)); #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif } if (type == T_MCHK) { mca_intr(); return; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) return; #endif /* * We must not allow context switches until %cr2 is read. * Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts. * All faults use interrupt gates, so %cr2 can be safely read * now, before optional enable of the interrupts below. */ if (type == T_PAGEFLT) eva = rcr2(); /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but it * is better than running with interrupts disabled until they * are accidentally enabled later. */ if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf("pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); /* * Conditionally reenable interrupts. If we hold a spin lock, * then we must not reenable interrupts. This might be a * spurious page fault. */ if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 && frame->tf_eip != (int)cpu_switch_load_gs) enable_intr(); if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ #ifdef KDTRACE_HOOKS if (trap_user_dtrace(frame, &dtrace_pid_probe_ptr)) return; #else enable_intr(); #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); user_trctrap_out: signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_eflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = npxtrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; /* * The following two traps can happen in vm86 mode, * and, if so, we want to handle them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); ucode = 0; /* XXXKIB: better code ? */ if (signo == SIGTRAP) { load_dr6(rdr6() | 0x4000); goto user_trctrap_out; } if (signo == 0) goto user; break; } signo = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ addr = eva; pf = trap_pfault(frame, true, eva, &signo, &ucode); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (pf == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; break; } #endif if (pf == -1) return; if (pf == 0) goto user; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) return; uprintf("pid %d killed due to lack of floating point\n", p->p_pid); signo = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = npxtrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: (void)trap_user_dtrace(frame, &dtrace_return_probe_ptr); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void)trap_pfault(frame, false, eva, NULL, NULL); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) return; break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); return; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto kernel_trctrap; } if (signo != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); return; } /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif return; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * N.B. Comparing to long mode, 32-bit mode * does not push %esp on the trap frame, * because iretl faulted while in ring 0. As * the consequence, there is no need to fixup * the stack pointer for doreti_iret_fault, * the fixup and the complimentary trap() call * are executed on the main thread stack, not * on the trampoline stack. */ if (frame->tf_eip == (int)doreti_iret + setidt_disp) { frame->tf_eip = (int)doreti_iret_fault + setidt_disp; return; } if (type == T_STKFLT) break; if (frame->tf_eip == (int)doreti_popl_ds + setidt_disp) { frame->tf_eip = (int)doreti_popl_ds_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_es + setidt_disp) { frame->tf_eip = (int)doreti_popl_es_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_fs + setidt_disp) { frame->tf_eip = (int)doreti_popl_fs_fault + setidt_disp; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ kernel_trctrap: /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6) && !(curpcb->pcb_flags & PCB_VM86CALL)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (frame->tf_eip == (uintptr_t)IDTVEC(int0x80_syscall) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(bpt) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(dbg) + setidt_disp) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ } trap_fatal(frame, eva); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_ss, frame->tf_esp, frame->tf_cs, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } /* * Handle all details of a page fault. * Returns: * -2 if the fault was caused by triggered workaround for Intel Pentium * 0xf00f bug. * -1 if this fault was fatal, typically from kernel mode * (cannot happen, but we need to return something). * 0 if this fault was handled by updating either the user or kernel * page table, execution can continue. * 1 if this fault was from usermode and it was not handled, a synchronous * signal should be delivered to the thread. *signo returns the signal * number, *ucode gives si_code. */ static int trap_pfault(struct trapframe *frame, bool usermode, vm_offset_t eva, int *signo, int *ucode) { struct thread *td; struct proc *p; vm_map_t map; int rv; vm_prot_t ftype; MPASS(!usermode || (signo != NULL && ucode != NULL)); td = curthread; p = td->td_proc; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } if (eva >= PMAP_TRM_MIN_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { *ucode = ILL_PRVOPC; *signo = SIGILL; return (-2); } #endif if (usermode) { *signo = SIGSEGV; *ucode = SEGV_MAPERR; return (1); } trap_fatal(frame, eva); return (-1); } else { map = usermode ? &p->p_vmspace->vm_map : kernel_map; /* * Kernel cannot access a user-space address directly * because user pages are not mapped. Also, page * faults must not be caused during the interrupts. */ if (!usermode && td->td_intr_nesting_level != 0) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (usermode) return (1); if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type), frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } else { printf("error code = %#x\n", code); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; frame->tf_err = eva; /* smuggle fault address to ddb */ handled = kdb_trap(type, 0, frame); frame->tf_err = code; /* restore error code */ kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); if (trap_msg(type) != NULL) panic("%s", trap_msg(type)); else panic("unknown/reserved trap"); } #ifdef KDTRACE_HOOKS /* * Invoke a userspace DTrace hook. The hook pointer is cleared when no * userspace probes are enabled, so we must synchronize with DTrace to ensure * that a trapping thread is able to call the hook before it is cleared. */ static bool trap_user_dtrace(struct trapframe *frame, int (**hookp)(struct trapframe *)) { int (*hook)(struct trapframe *); hook = atomic_load_ptr(hookp); enable_intr(); if (hook != NULL) return ((hook)(frame) == 0); return (false); } #endif /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler(void) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip); printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp); printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; long tmp; int error; #ifdef COMPAT_43 u_int32_t eip; int cs; #endif p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; #ifdef COMPAT_43 if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) { /* * In lcall $7,$0 after int $0x80. Convert the user * frame to what it would be for a direct int 0x80 instead * of lcall $7,$0, by popping the lcall return address. */ error = fueword32((void *)frame->tf_esp, &eip); if (error == -1) return (EFAULT); cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t))); if (cs == -1) return (EFAULT); /* * Unwind in-kernel frame after all stack frame pieces * were successfully read. */ frame->tf_eip = eip; frame->tf_cs = cs; frame->tf_esp += 2 * sizeof(u_int32_t); frame->tf_err = 7; /* size of lcall $7,$0 */ } #endif sa->code = frame->tf_eax; params = (caddr_t)frame->tf_esp + sizeof(uint32_t); /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(uint32_t); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; - if (params != NULL && sa->narg != 0) + if (params != NULL && sa->callp->sy_narg != 0) error = copyin(params, (caddr_t)sa->args, - (u_int)(sa->narg * sizeof(uint32_t))); + (u_int)(sa->callp->sy_narg * sizeof(uint32_t))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_eflags; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!(TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; syscallenter(td); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); syscallret(td); } Index: head/sys/i386/include/proc.h =================================================================== --- head/sys/i386/include/proc.h (revision 366204) +++ head/sys/i386/include/proc.h (revision 366205) @@ -1,91 +1,90 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)proc.h 7.1 (Berkeley) 5/15/91 * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ #include struct proc_ldt { caddr_t ldt_base; int ldt_len; int ldt_refcnt; u_long ldt_active; struct segment_descriptor ldt_sd; }; /* * Machine-dependent part of the proc structure for i386. * Table of MD locks: * t - Descriptor tables lock */ struct mdthread { int md_spinlock_count; /* (k) */ register_t md_saved_flags; /* (k) */ register_t md_spurflt_addr; /* (k) Spurious page fault address. */ }; struct mdproc { struct proc_ldt *md_ldt; /* (t) per-process ldt */ }; #define KINFO_PROC_SIZE 768 struct syscall_args { u_int code; struct sysent *callp; register_t args[8]; - int narg; }; #ifdef _KERNEL /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ (total) = td->td_kstack_pages * PAGE_SIZE; \ (used) = (char *)td->td_kstack + \ td->td_kstack_pages * PAGE_SIZE - \ (char *)&td; \ } while (0) void set_user_ldt(struct mdproc *); struct proc_ldt *user_ldt_alloc(struct mdproc *, int); void user_ldt_free(struct thread *); void user_ldt_deref(struct proc_ldt *pldt); extern struct mtx dt_lock; #endif /* _KERNEL */ #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/i386/linux/linux_sysvec.c =================================================================== --- head/sys/i386/linux/linux_sysvec.c (revision 366204) +++ head/sys/i386/linux/linux_sysvec.c (revision 366205) @@ -1,1099 +1,1098 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup(uintptr_t *stack_base, struct image_params *iparams); static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *iparams); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static int linux_szplatform; const char *linux_kplatform; static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; static eventhandler_tag linux_thread_dtor_tag; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)args->argc + 1); base--; suword(base, (intptr_t)envp); base--; suword(base, (intptr_t)argv); base--; suword(base, imgp->args->argc); *stack_base = (uintptr_t)base; return (0); } static int linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) { struct proc *p; Elf32_Auxargs *args; Elf32_Auxinfo *argarray, *pos; Elf32_Addr *uplatform; struct ps_strings *arginfo; int error, issetugid; p = imgp->proc; issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); args = (Elf32_Auxargs *)imgp->auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) { register_t *base; base = (register_t *)*stack_base; base--; if (suword(base, (register_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } /* * Copied from kern/kern_exec.c */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, error; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* Calculate string base and vector table pointers. */ p = imgp->proc; if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (uintptr_t)arginfo; /* Install LINUX_PLATFORM. */ destp -= linux_szplatform; destp = rounddown2(destp, sizeof(void *)); error = copyout(linux_kplatform, (void *)destp, linux_szplatform); if (error != 0) return (error); if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(void *)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword(vectp++, 0) != 0) return (EFAULT); if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int sig, code; int oonstack; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; frame.sf_siginfo = &fp->sf_si; frame.sf_ucontext = &fp->sf_sc; /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = NULL; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_gs = rgs(); frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_rt_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int sig, code; int oonstack; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; sig = ksi->ksi_signo; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = rgs(); frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_edi; frame.sf_sc.sc_esi = regs->tf_esi; frame.sf_sc.sc_ebp = regs->tf_ebp; frame.sf_sc.sc_ebx = regs->tf_ebx; frame.sf_sc.sc_esp = regs->tf_esp; frame.sf_sc.sc_edx = regs->tf_edx; frame.sf_sc.sc_ecx = regs->tf_ecx; frame.sf_sc.sc_eax = regs->tf_eax; frame.sf_sc.sc_eip = regs->tf_eip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_eflags; frame.sf_sc.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; l_sigset_t lmask; sigset_t bmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_es = frame.sf_sc.sc_es; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_edi = frame.sf_sc.sc_edi; regs->tf_esi = frame.sf_sc.sc_esi; regs->tf_ebp = frame.sf_sc.sc_ebp; regs->tf_ebx = frame.sf_sc.sc_ebx; regs->tf_edx = frame.sf_sc.sc_edx; regs->tf_ecx = frame.sf_sc.sc_ecx; regs->tf_eax = frame.sf_sc.sc_eax; regs->tf_eip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_eflags = eflags; regs->tf_esp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_edi = context->sc_edi; regs->tf_esi = context->sc_esi; regs->tf_ebp = context->sc_ebp; regs->tf_ebx = context->sc_ebx; regs->tf_edx = context->sc_edx; regs->tf_ecx = context->sc_ecx; regs->tf_eax = context->sc_eax; regs->tf_eip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_eflags = eflags; regs->tf_esp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; /* Call sigaltstack & ignore results. */ lss = &uc.uc_stack; ss.ss_sp = lss->ss_sp; ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_eax; sa->args[0] = frame->tf_ebx; sa->args[1] = frame->tf_ecx; sa->args[2] = frame->tf_edx; sa->args[3] = frame->tf_esi; sa->args[4] = frame->tf_edi; sa->args[5] = frame->tf_ebp; /* Unconfirmed */ if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) frame->tf_eax = linux_to_bsd_errno(error); } } /* * exec_setregs may initialize some registers differently than Linux * does, thus potentially confusing Linux binaries. If necessary, we * override the exec_setregs default(s) here. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct pcb *pcb = td->td_pcb; exec_setregs(td, imgp, stack); /* Linux sets %gs to 0, we default to _udatasel. */ pcb->pcb_gs = 0; load_gs(0); pcb->pcb_initial_npxcw = __LINUX_NPXCW__; } static void linux_get_machine(const char **dst) { switch (cpu_class) { case CPUCLASS_686: *dst = "i686"; break; case CPUCLASS_586: *dst = "i586"; break; case CPUCLASS_486: *dst = "i486"; break; default: *dst = "i386"; } } struct sysentvec linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux a.out", .sv_coredump = NULL, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF", .sv_coredump = elf32_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = LINUX_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-musl-i386.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_get_machine(&linux_kplatform); linux_szplatform = roundup(strlen(linux_kplatform) + 1, sizeof(char *)); linux_dev_shm_create(); linux_osd_jail_register(); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_dev_shm_destroy(); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); FEATURE(linux, "Linux 32bit support"); Index: head/sys/kern/kern_thread.c =================================================================== --- head/sys/kern/kern_thread.c (revision 366204) +++ head/sys/kern/kern_thread.c (revision 366205) @@ -1,1355 +1,1355 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2001 Julian Elischer . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "opt_witness.h" #include "opt_hwpmc_hooks.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #include #include /* * Asserts below verify the stability of struct thread and struct proc * layout, as exposed by KBI to modules. On head, the KBI is allowed * to drift, change to the structures must be accompanied by the * assert update. * * On the stable branches after KBI freeze, conditions must not be * violated. Typically new fields are moved to the end of the * structures. */ #ifdef __amd64__ _Static_assert(offsetof(struct thread, td_flags) == 0xfc, "struct thread KBI td_flags"); _Static_assert(offsetof(struct thread, td_pflags) == 0x104, "struct thread KBI td_pflags"); -_Static_assert(offsetof(struct thread, td_frame) == 0x4a8, +_Static_assert(offsetof(struct thread, td_frame) == 0x4a0, "struct thread KBI td_frame"); _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0xb0, "struct proc KBI p_flag"); _Static_assert(offsetof(struct proc, p_pid) == 0xbc, "struct proc KBI p_pid"); _Static_assert(offsetof(struct proc, p_filemon) == 0x3b8, "struct proc KBI p_filemon"); _Static_assert(offsetof(struct proc, p_comm) == 0x3d0, "struct proc KBI p_comm"); _Static_assert(offsetof(struct proc, p_emuldata) == 0x4b0, "struct proc KBI p_emuldata"); #endif #ifdef __i386__ _Static_assert(offsetof(struct thread, td_flags) == 0x98, "struct thread KBI td_flags"); _Static_assert(offsetof(struct thread, td_pflags) == 0xa0, "struct thread KBI td_pflags"); -_Static_assert(offsetof(struct thread, td_frame) == 0x304, +_Static_assert(offsetof(struct thread, td_frame) == 0x300, "struct thread KBI td_frame"); -_Static_assert(offsetof(struct thread, td_emuldata) == 0x348, +_Static_assert(offsetof(struct thread, td_emuldata) == 0x344, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0x68, "struct proc KBI p_flag"); _Static_assert(offsetof(struct proc, p_pid) == 0x74, "struct proc KBI p_pid"); _Static_assert(offsetof(struct proc, p_filemon) == 0x268, "struct proc KBI p_filemon"); _Static_assert(offsetof(struct proc, p_comm) == 0x27c, "struct proc KBI p_comm"); _Static_assert(offsetof(struct proc, p_emuldata) == 0x308, "struct proc KBI p_emuldata"); #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE(proc, , , lwp__exit); /* * thread related storage. */ static uma_zone_t thread_zone; TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); static struct mtx zombie_lock; MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN); static void thread_zombie(struct thread *); static int thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary); #define TID_BUFFER_SIZE 1024 struct mtx tid_lock; static struct unrhdr *tid_unrhdr; static lwpid_t tid_buffer[TID_BUFFER_SIZE]; static int tid_head, tid_tail; static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash"); struct tidhashhead *tidhashtbl; u_long tidhash; struct rwlock tidhash_lock; EVENTHANDLER_LIST_DEFINE(thread_ctor); EVENTHANDLER_LIST_DEFINE(thread_dtor); EVENTHANDLER_LIST_DEFINE(thread_init); EVENTHANDLER_LIST_DEFINE(thread_fini); static lwpid_t tid_alloc(void) { lwpid_t tid; tid = alloc_unr(tid_unrhdr); if (tid != -1) return (tid); mtx_lock(&tid_lock); if (tid_head == tid_tail) { mtx_unlock(&tid_lock); return (-1); } tid = tid_buffer[tid_head]; tid_head = (tid_head + 1) % TID_BUFFER_SIZE; mtx_unlock(&tid_lock); return (tid); } static void tid_free(lwpid_t tid) { lwpid_t tmp_tid = -1; mtx_lock(&tid_lock); if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) { tmp_tid = tid_buffer[tid_head]; tid_head = (tid_head + 1) % TID_BUFFER_SIZE; } tid_buffer[tid_tail] = tid; tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE; mtx_unlock(&tid_lock); if (tmp_tid != -1) free_unr(tid_unrhdr, tmp_tid); } /* * Prepare a thread for use. */ static int thread_ctor(void *mem, int size, void *arg, int flags) { struct thread *td; td = (struct thread *)mem; td->td_state = TDS_INACTIVE; td->td_lastcpu = td->td_oncpu = NOCPU; td->td_tid = tid_alloc(); /* * Note that td_critnest begins life as 1 because the thread is not * running and is thereby implicitly waiting to be on the receiving * end of a context switch. */ td->td_critnest = 1; td->td_lend_user_pri = PRI_MAX; EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); #ifdef AUDIT audit_thread_alloc(td); #endif umtx_thread_alloc(td); return (0); } /* * Reclaim a thread after use. */ static void thread_dtor(void *mem, int size, void *arg) { struct thread *td; td = (struct thread *)mem; #ifdef INVARIANTS /* Verify that this thread is in a safe state to free. */ switch (td->td_state) { case TDS_INHIBITED: case TDS_RUNNING: case TDS_CAN_RUN: case TDS_RUNQ: /* * We must never unlink a thread that is in one of * these states, because it is currently active. */ panic("bad state for thread unlinking"); /* NOTREACHED */ case TDS_INACTIVE: break; default: panic("bad thread state"); /* NOTREACHED */ } #endif #ifdef AUDIT audit_thread_free(td); #endif /* Free all OSD associated to this thread. */ osd_thread_exit(td); td_softdep_cleanup(td); MPASS(td->td_su == NULL); EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td); tid_free(td->td_tid); } /* * Initialize type-stable parts of a thread (when newly created). */ static int thread_init(void *mem, int size, int flags) { struct thread *td; td = (struct thread *)mem; td->td_sleepqueue = sleepq_alloc(); td->td_turnstile = turnstile_alloc(); td->td_rlqe = NULL; EVENTHANDLER_DIRECT_INVOKE(thread_init, td); umtx_thread_init(td); td->td_kstack = 0; td->td_sel = NULL; return (0); } /* * Tear down type-stable parts of a thread (just before being discarded). */ static void thread_fini(void *mem, int size) { struct thread *td; td = (struct thread *)mem; EVENTHANDLER_DIRECT_INVOKE(thread_fini, td); rlqentry_free(td->td_rlqe); turnstile_free(td->td_turnstile); sleepq_free(td->td_sleepqueue); umtx_thread_fini(td); seltdfini(td); } /* * For a newly created process, * link up all the structures and its initial threads etc. * called from: * {arch}/{arch}/machdep.c {arch}_init(), init386() etc. * proc_dtor() (should go away) * proc_init() */ void proc_linkup0(struct proc *p, struct thread *td) { TAILQ_INIT(&p->p_threads); /* all threads in proc */ proc_linkup(p, td); } void proc_linkup(struct proc *p, struct thread *td) { sigqueue_init(&p->p_sigqueue, p); p->p_ksi = ksiginfo_alloc(1); if (p->p_ksi != NULL) { /* XXX p_ksi may be null if ksiginfo zone is not ready */ p->p_ksi->ksi_flags = KSI_EXT | KSI_INS; } LIST_INIT(&p->p_mqnotifier); p->p_numthreads = 0; thread_link(td, p); } /* * Initialize global thread allocation resources. */ void threadinit(void) { uint32_t flags; mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF); /* * pid_max cannot be greater than PID_MAX. * leave one number for thread0. */ tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock); flags = UMA_ZONE_NOFREE; #ifdef __aarch64__ /* * Force thread structures to be allocated from the direct map. * Otherwise, superpage promotions and demotions may temporarily * invalidate thread structure mappings. For most dynamically allocated * structures this is not a problem, but translation faults cannot be * handled without accessing curthread. */ flags |= UMA_ZONE_CONTIG; #endif thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), thread_ctor, thread_dtor, thread_init, thread_fini, 32 - 1, flags); tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash); rw_init(&tidhash_lock, "tidhash"); } /* * Place an unused thread on the zombie list. * Use the slpq as that must be unused by now. */ void thread_zombie(struct thread *td) { mtx_lock_spin(&zombie_lock); TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq); mtx_unlock_spin(&zombie_lock); } /* * Release a thread that has exited after cpu_throw(). */ void thread_stash(struct thread *td) { atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1); thread_zombie(td); } /* * Reap zombie resources. */ void thread_reap(void) { struct thread *td_first, *td_next; /* * Don't even bother to lock if none at this instant, * we really don't care about the next instant. */ if (!TAILQ_EMPTY(&zombie_threads)) { mtx_lock_spin(&zombie_lock); td_first = TAILQ_FIRST(&zombie_threads); if (td_first) TAILQ_INIT(&zombie_threads); mtx_unlock_spin(&zombie_lock); while (td_first) { td_next = TAILQ_NEXT(td_first, td_slpq); thread_cow_free(td_first); thread_free(td_first); td_first = td_next; } } } /* * Allocate a thread. */ struct thread * thread_alloc(int pages) { struct thread *td; thread_reap(); /* check if any zombies to get */ td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK); KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack")); if (!vm_thread_new(td, pages)) { uma_zfree(thread_zone, td); return (NULL); } cpu_thread_alloc(td); return (td); } int thread_alloc_stack(struct thread *td, int pages) { KASSERT(td->td_kstack == 0, ("thread_alloc_stack called on a thread with kstack")); if (!vm_thread_new(td, pages)) return (0); cpu_thread_alloc(td); return (1); } /* * Deallocate a thread. */ void thread_free(struct thread *td) { lock_profile_thread_exit(td); if (td->td_cpuset) cpuset_rel(td->td_cpuset); td->td_cpuset = NULL; cpu_thread_free(td); if (td->td_kstack != 0) vm_thread_dispose(td); callout_drain(&td->td_slpcallout); uma_zfree(thread_zone, td); } void thread_cow_get_proc(struct thread *newtd, struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); newtd->td_realucred = crcowget(p->p_ucred); newtd->td_ucred = newtd->td_realucred; newtd->td_limit = lim_hold(p->p_limit); newtd->td_cowgen = p->p_cowgen; } void thread_cow_get(struct thread *newtd, struct thread *td) { MPASS(td->td_realucred == td->td_ucred); newtd->td_realucred = crcowget(td->td_realucred); newtd->td_ucred = newtd->td_realucred; newtd->td_limit = lim_hold(td->td_limit); newtd->td_cowgen = td->td_cowgen; } void thread_cow_free(struct thread *td) { if (td->td_realucred != NULL) crcowfree(td); if (td->td_limit != NULL) lim_free(td->td_limit); } void thread_cow_update(struct thread *td) { struct proc *p; struct ucred *oldcred; struct plimit *oldlimit; p = td->td_proc; oldlimit = NULL; PROC_LOCK(p); oldcred = crcowsync(); if (td->td_limit != p->p_limit) { oldlimit = td->td_limit; td->td_limit = lim_hold(p->p_limit); } td->td_cowgen = p->p_cowgen; PROC_UNLOCK(p); if (oldcred != NULL) crfree(oldcred); if (oldlimit != NULL) lim_free(oldlimit); } /* * Discard the current thread and exit from its context. * Always called with scheduler locked. * * Because we can't free a thread while we're operating under its context, * push the current thread into our CPU's deadthread holder. This means * we needn't worry about someone else grabbing our context before we * do a cpu_throw(). */ void thread_exit(void) { uint64_t runtime, new_switchtime; struct thread *td; struct thread *td2; struct proc *p; int wakeup_swapper; td = curthread; p = td->td_proc; PROC_SLOCK_ASSERT(p, MA_OWNED); mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(p != NULL, ("thread exiting without a process")); CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, (long)p->p_pid, td->td_name); SDT_PROBE0(proc, , , lwp__exit); KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending")); MPASS(td->td_realucred == td->td_ucred); /* * drop FPU & debug register state storage, or any other * architecture specific resources that * would not be on a new untouched process. */ cpu_thread_exit(td); /* * The last thread is left attached to the process * So that the whole bundle gets recycled. Skip * all this stuff if we never had threads. * EXIT clears all sign of other threads when * it goes to single threading, so the last thread always * takes the short path. */ if (p->p_flag & P_HADTHREADS) { if (p->p_numthreads > 1) { atomic_add_int(&td->td_proc->p_exitthreads, 1); thread_unlink(td); td2 = FIRST_THREAD_IN_PROC(p); sched_exit_thread(td2, td); /* * The test below is NOT true if we are the * sole exiting thread. P_STOPPED_SINGLE is unset * in exit1() after it is the only survivor. */ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { if (p->p_numthreads == p->p_suspcount) { thread_lock(p->p_singlethread); wakeup_swapper = thread_unsuspend_one( p->p_singlethread, p, false); if (wakeup_swapper) kick_proc0(); } } PCPU_SET(deadthread, td); } else { /* * The last thread is exiting.. but not through exit() */ panic ("thread_exit: Last thread exiting on its own"); } } #ifdef HWPMC_HOOKS /* * If this thread is part of a process that is being tracked by hwpmc(4), * inform the module of the thread's impending exit. */ if (PMC_PROC_IS_USING_PMCS(td->td_proc)) { PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL); } else if (PMC_SYSTEM_SAMPLING_ACTIVE()) PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL); #endif PROC_UNLOCK(p); PROC_STATLOCK(p); thread_lock(td); PROC_SUNLOCK(p); /* Do the same timestamp bookkeeping that mi_switch() would do. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); PCPU_SET(switchticks, ticks); VM_CNT_INC(v_swtch); /* Save our resource usage in our process. */ td->td_ru.ru_nvcsw++; ruxagg_locked(p, td); rucollect(&p->p_ru, &td->td_ru); PROC_STATUNLOCK(p); td->td_state = TDS_INACTIVE; #ifdef WITNESS witness_thread_exit(td); #endif CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); sched_throw(td); panic("I'm a teapot!"); /* NOTREACHED */ } /* * Do any thread specific cleanups that may be needed in wait() * called with Giant, proc and schedlock not held. */ void thread_wait(struct proc *p) { struct thread *td; mtx_assert(&Giant, MA_NOTOWNED); KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()")); KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking")); td = FIRST_THREAD_IN_PROC(p); /* Lock the last thread so we spin until it exits cpu_throw(). */ thread_lock(td); thread_unlock(td); lock_profile_thread_exit(td); cpuset_rel(td->td_cpuset); td->td_cpuset = NULL; cpu_thread_clean(td); thread_cow_free(td); callout_drain(&td->td_slpcallout); thread_reap(); /* check for zombie threads etc. */ } /* * Link a thread to a process. * set up anything that needs to be initialized for it to * be used by the process. */ void thread_link(struct thread *td, struct proc *p) { /* * XXX This can't be enabled because it's called for proc0 before * its lock has been created. * PROC_LOCK_ASSERT(p, MA_OWNED); */ td->td_state = TDS_INACTIVE; td->td_proc = p; td->td_flags = TDF_INMEM; LIST_INIT(&td->td_contested); LIST_INIT(&td->td_lprof[0]); LIST_INIT(&td->td_lprof[1]); #ifdef EPOCH_TRACE SLIST_INIT(&td->td_epochs); #endif sigqueue_init(&td->td_sigqueue, p); callout_init(&td->td_slpcallout, 1); TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); p->p_numthreads++; } /* * Called from: * thread_exit() */ void thread_unlink(struct thread *td) { struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); #ifdef EPOCH_TRACE MPASS(SLIST_EMPTY(&td->td_epochs)); #endif TAILQ_REMOVE(&p->p_threads, td, td_plist); p->p_numthreads--; /* could clear a few other things here */ /* Must NOT clear links to proc! */ } static int calc_remaining(struct proc *p, int mode) { int remaining; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); if (mode == SINGLE_EXIT) remaining = p->p_numthreads; else if (mode == SINGLE_BOUNDARY) remaining = p->p_numthreads - p->p_boundary_count; else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC) remaining = p->p_numthreads - p->p_suspcount; else panic("calc_remaining: wrong mode %d", mode); return (remaining); } static int remain_for_mode(int mode) { return (mode == SINGLE_ALLPROC ? 0 : 1); } static int weed_inhib(int mode, struct thread *td2, struct proc *p) { int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td2, MA_OWNED); wakeup_swapper = 0; /* * Since the thread lock is dropped by the scheduler we have * to retry to check for races. */ restart: switch (mode) { case SINGLE_EXIT: if (TD_IS_SUSPENDED(td2)) { wakeup_swapper |= thread_unsuspend_one(td2, p, true); thread_lock(td2); goto restart; } if (TD_CAN_ABORT(td2)) { wakeup_swapper |= sleepq_abort(td2, EINTR); return (wakeup_swapper); } break; case SINGLE_BOUNDARY: case SINGLE_NO_EXIT: if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0) { wakeup_swapper |= thread_unsuspend_one(td2, p, false); thread_lock(td2); goto restart; } if (TD_CAN_ABORT(td2)) { wakeup_swapper |= sleepq_abort(td2, ERESTART); return (wakeup_swapper); } break; case SINGLE_ALLPROC: /* * ALLPROC suspend tries to avoid spurious EINTR for * threads sleeping interruptable, by suspending the * thread directly, similarly to sig_suspend_threads(). * Since such sleep is not performed at the user * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP * is used to avoid immediate un-suspend. */ if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY | TDF_ALLPROCSUSP)) == 0) { wakeup_swapper |= thread_unsuspend_one(td2, p, false); thread_lock(td2); goto restart; } if (TD_CAN_ABORT(td2)) { if ((td2->td_flags & TDF_SBDRY) == 0) { thread_suspend_one(td2); td2->td_flags |= TDF_ALLPROCSUSP; } else { wakeup_swapper |= sleepq_abort(td2, ERESTART); return (wakeup_swapper); } } break; default: break; } thread_unlock(td2); return (wakeup_swapper); } /* * Enforce single-threading. * * Returns 1 if the caller must abort (another thread is waiting to * exit the process or similar). Process is locked! * Returns 0 when you are successfully the only thread running. * A process has successfully single threaded in the suspend mode when * There are no threads in user mode. Threads in the kernel must be * allowed to continue until they get to the user boundary. They may even * copy out their return values and data before suspending. They may however be * accelerated in reaching the user boundary as we will wake up * any sleeping threads that are interruptable. (PCATCH). */ int thread_single(struct proc *p, int mode) { struct thread *td; struct thread *td2; int remaining, wakeup_swapper; td = curthread; KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, ("invalid mode %d", mode)); /* * If allowing non-ALLPROC singlethreading for non-curproc * callers, calc_remaining() and remain_for_mode() should be * adjusted to also account for td->td_proc != p. For now * this is not implemented because it is not used. */ KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) || (mode != SINGLE_ALLPROC && td->td_proc == p), ("mode %d proc %p curproc %p", mode, p, td->td_proc)); mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC) return (0); /* Is someone already single threading? */ if (p->p_singlethread != NULL && p->p_singlethread != td) return (1); if (mode == SINGLE_EXIT) { p->p_flag |= P_SINGLE_EXIT; p->p_flag &= ~P_SINGLE_BOUNDARY; } else { p->p_flag &= ~P_SINGLE_EXIT; if (mode == SINGLE_BOUNDARY) p->p_flag |= P_SINGLE_BOUNDARY; else p->p_flag &= ~P_SINGLE_BOUNDARY; } if (mode == SINGLE_ALLPROC) p->p_flag |= P_TOTAL_STOP; p->p_flag |= P_STOPPED_SINGLE; PROC_SLOCK(p); p->p_singlethread = td; remaining = calc_remaining(p, mode); while (remaining != remain_for_mode(mode)) { if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE) goto stopme; wakeup_swapper = 0; FOREACH_THREAD_IN_PROC(p, td2) { if (td2 == td) continue; thread_lock(td2); td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; if (TD_IS_INHIBITED(td2)) { wakeup_swapper |= weed_inhib(mode, td2, p); #ifdef SMP } else if (TD_IS_RUNNING(td2) && td != td2) { forward_signal(td2); thread_unlock(td2); #endif } else thread_unlock(td2); } if (wakeup_swapper) kick_proc0(); remaining = calc_remaining(p, mode); /* * Maybe we suspended some threads.. was it enough? */ if (remaining == remain_for_mode(mode)) break; stopme: /* * Wake us up when everyone else has suspended. * In the mean time we suspend as well. */ thread_suspend_switch(td, p); remaining = calc_remaining(p, mode); } if (mode == SINGLE_EXIT) { /* * Convert the process to an unthreaded process. The * SINGLE_EXIT is called by exit1() or execve(), in * both cases other threads must be retired. */ KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads")); p->p_singlethread = NULL; p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS); /* * Wait for any remaining threads to exit cpu_throw(). */ while (p->p_exitthreads != 0) { PROC_SUNLOCK(p); PROC_UNLOCK(p); sched_relinquish(td); PROC_LOCK(p); PROC_SLOCK(p); } } else if (mode == SINGLE_BOUNDARY) { /* * Wait until all suspended threads are removed from * the processors. The thread_suspend_check() * increments p_boundary_count while it is still * running, which makes it possible for the execve() * to destroy vmspace while our other threads are * still using the address space. * * We lock the thread, which is only allowed to * succeed after context switch code finished using * the address space. */ FOREACH_THREAD_IN_PROC(p, td2) { if (td2 == td) continue; thread_lock(td2); KASSERT((td2->td_flags & TDF_BOUNDARY) != 0, ("td %p not on boundary", td2)); KASSERT(TD_IS_SUSPENDED(td2), ("td %p is not suspended", td2)); thread_unlock(td2); } } PROC_SUNLOCK(p); return (0); } bool thread_suspend_check_needed(void) { struct proc *p; struct thread *td; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 && (td->td_dbgflags & TDB_SUSPEND) != 0)); } /* * Called in from locations that can safely check to see * whether we have to suspend or at least throttle for a * single-thread event (e.g. fork). * * Such locations include userret(). * If the "return_instead" argument is non zero, the thread must be able to * accept 0 (caller may continue), or 1 (caller must abort) as a result. * * The 'return_instead' argument tells the function if it may do a * thread_exit() or suspend, or whether the caller must abort and back * out instead. * * If the thread that set the single_threading request has set the * P_SINGLE_EXIT bit in the process flags then this call will never return * if 'return_instead' is false, but will exit. * * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 *---------------+--------------------+--------------------- * 0 | returns 0 | returns 0 or 1 * | when ST ends | immediately *---------------+--------------------+--------------------- * 1 | thread exits | returns 1 * | | immediately * 0 = thread_exit() or suspension ok, * other = return error instead of stopping the thread. * * While a full suspension is under effect, even a single threading * thread would be suspended if it made this call (but it shouldn't). * This call should only be made from places where * thread_exit() would be safe as that may be the outcome unless * return_instead is set. */ int thread_suspend_check(int return_instead) { struct thread *td; struct proc *p; int wakeup_swapper; td = curthread; p = td->td_proc; mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); while (thread_suspend_check_needed()) { if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { KASSERT(p->p_singlethread != NULL, ("singlethread not set")); /* * The only suspension in action is a * single-threading. Single threader need not stop. * It is safe to access p->p_singlethread unlocked * because it can only be set to our address by us. */ if (p->p_singlethread == td) return (0); /* Exempt from stopping. */ } if ((p->p_flag & P_SINGLE_EXIT) && return_instead) return (EINTR); /* Should we goto user boundary if we didn't come from there? */ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) return (ERESTART); /* * Ignore suspend requests if they are deferred. */ if ((td->td_flags & TDF_SBDRY) != 0) { KASSERT(return_instead, ("TDF_SBDRY set for unsafe thread_suspend_check")); KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != (TDF_SEINTR | TDF_SERESTART), ("both TDF_SEINTR and TDF_SERESTART")); return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0); } /* * If the process is waiting for us to exit, * this thread should just suicide. * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. */ if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { PROC_UNLOCK(p); /* * Allow Linux emulation layer to do some work * before thread suicide. */ if (__predict_false(p->p_sysent->sv_thread_detach != NULL)) (p->p_sysent->sv_thread_detach)(td); umtx_thread_exit(td); kern_thr_exit(td); panic("stopped thread did not exit"); } PROC_SLOCK(p); thread_stopped(p); if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { if (p->p_numthreads == p->p_suspcount + 1) { thread_lock(p->p_singlethread); wakeup_swapper = thread_unsuspend_one( p->p_singlethread, p, false); if (wakeup_swapper) kick_proc0(); } } PROC_UNLOCK(p); thread_lock(td); /* * When a thread suspends, it just * gets taken off all queues. */ thread_suspend_one(td); if (return_instead == 0) { p->p_boundary_count++; td->td_flags |= TDF_BOUNDARY; } PROC_SUNLOCK(p); mi_switch(SW_INVOL | SWT_SUSPEND); PROC_LOCK(p); } return (0); } /* * Check for possible stops and suspensions while executing a * casueword or similar transiently failing operation. * * The sleep argument controls whether the function can handle a stop * request itself or it should return ERESTART and the request is * proceed at the kernel/user boundary in ast. * * Typically, when retrying due to casueword(9) failure (rv == 1), we * should handle the stop requests there, with exception of cases when * the thread owns a kernel resource, for instance busied the umtx * key, or when functions return immediately if thread_check_susp() * returned non-zero. On the other hand, retrying the whole lock * operation, we better not stop there but delegate the handling to * ast. * * If the request is for thread termination P_SINGLE_EXIT, we cannot * handle it at all, and simply return EINTR. */ int thread_check_susp(struct thread *td, bool sleep) { struct proc *p; int error; /* * The check for TDF_NEEDSUSPCHK is racy, but it is enough to * eventually break the lockstep loop. */ if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) return (0); error = 0; p = td->td_proc; PROC_LOCK(p); if (p->p_flag & P_SINGLE_EXIT) error = EINTR; else if (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) error = sleep ? thread_suspend_check(0) : ERESTART; PROC_UNLOCK(p); return (error); } void thread_suspend_switch(struct thread *td, struct proc *p) { KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); /* * We implement thread_suspend_one in stages here to avoid * dropping the proc lock while the thread lock is owned. */ if (p == td->td_proc) { thread_stopped(p); p->p_suspcount++; } PROC_UNLOCK(p); thread_lock(td); td->td_flags &= ~TDF_NEEDSUSPCHK; TD_SET_SUSPENDED(td); sched_sleep(td, 0); PROC_SUNLOCK(p); DROP_GIANT(); mi_switch(SW_VOL | SWT_SUSPEND); PICKUP_GIANT(); PROC_LOCK(p); PROC_SLOCK(p); } void thread_suspend_one(struct thread *td) { struct proc *p; p = td->td_proc; PROC_SLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); p->p_suspcount++; td->td_flags &= ~TDF_NEEDSUSPCHK; TD_SET_SUSPENDED(td); sched_sleep(td, 0); } static int thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended")); TD_CLR_SUSPENDED(td); td->td_flags &= ~TDF_ALLPROCSUSP; if (td->td_proc == p) { PROC_SLOCK_ASSERT(p, MA_OWNED); p->p_suspcount--; if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) { td->td_flags &= ~TDF_BOUNDARY; p->p_boundary_count--; } } return (setrunnable(td, 0)); } /* * Allow all threads blocked by single threading to continue running. */ void thread_unsuspend(struct proc *p) { struct thread *td; int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); wakeup_swapper = 0; if (!P_SHOULDSTOP(p)) { FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_IS_SUSPENDED(td)) { wakeup_swapper |= thread_unsuspend_one(td, p, true); } else thread_unlock(td); } } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && p->p_numthreads == p->p_suspcount) { /* * Stopping everything also did the job for the single * threading request. Now we've downgraded to single-threaded, * let it continue. */ if (p->p_singlethread->td_proc == p) { thread_lock(p->p_singlethread); wakeup_swapper = thread_unsuspend_one( p->p_singlethread, p, false); } } if (wakeup_swapper) kick_proc0(); } /* * End the single threading mode.. */ void thread_single_end(struct proc *p, int mode) { struct thread *td; int wakeup_swapper; KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, ("invalid mode %d", mode)); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) || (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0), ("mode %d does not match P_TOTAL_STOP", mode)); KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread, ("thread_single_end from other thread %p %p", curthread, p->p_singlethread)); KASSERT(mode != SINGLE_BOUNDARY || (p->p_flag & P_SINGLE_BOUNDARY) != 0, ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag)); p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY | P_TOTAL_STOP); PROC_SLOCK(p); p->p_singlethread = NULL; wakeup_swapper = 0; /* * If there are other threads they may now run, * unless of course there is a blanket 'stop order' * on the process. The single threader must be allowed * to continue however as this is a bad place to stop. */ if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) { FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_IS_SUSPENDED(td)) { wakeup_swapper |= thread_unsuspend_one(td, p, mode == SINGLE_BOUNDARY); } else thread_unlock(td); } } KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0, ("inconsistent boundary count %d", p->p_boundary_count)); PROC_SUNLOCK(p); if (wakeup_swapper) kick_proc0(); } struct thread * thread_find(struct proc *p, lwpid_t tid) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); FOREACH_THREAD_IN_PROC(p, td) { if (td->td_tid == tid) break; } return (td); } /* Locate a thread by number; return with proc lock held. */ struct thread * tdfind(lwpid_t tid, pid_t pid) { #define RUN_THRESH 16 struct thread *td; int run = 0; td = curthread; if (td->td_tid == tid) { if (pid != -1 && td->td_proc->p_pid != pid) return (NULL); PROC_LOCK(td->td_proc); return (td); } rw_rlock(&tidhash_lock); LIST_FOREACH(td, TIDHASH(tid), td_hash) { if (td->td_tid == tid) { if (pid != -1 && td->td_proc->p_pid != pid) { td = NULL; break; } PROC_LOCK(td->td_proc); if (td->td_proc->p_state == PRS_NEW) { PROC_UNLOCK(td->td_proc); td = NULL; break; } if (run > RUN_THRESH) { if (rw_try_upgrade(&tidhash_lock)) { LIST_REMOVE(td, td_hash); LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); rw_wunlock(&tidhash_lock); return (td); } } break; } run++; } rw_runlock(&tidhash_lock); return (td); } void tidhash_add(struct thread *td) { rw_wlock(&tidhash_lock); LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); rw_wunlock(&tidhash_lock); } void tidhash_remove(struct thread *td) { rw_wlock(&tidhash_lock); LIST_REMOVE(td, td_hash); rw_wunlock(&tidhash_lock); } Index: head/sys/kern/subr_syscall.c =================================================================== --- head/sys/kern/subr_syscall.c (revision 366204) +++ head/sys/kern/subr_syscall.c (revision 366205) @@ -1,244 +1,244 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2010 Konstantin Belousov * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include "opt_capsicum.h" #include "opt_ktrace.h" __FBSDID("$FreeBSD$"); #include #include #include #ifdef KTRACE #include #include #endif #include static inline void syscallenter(struct thread *td) { struct proc *p; struct syscall_args *sa; int error, traced; VM_CNT_INC(v_syscall); p = td->td_proc; sa = &td->td_sa; td->td_pticks = 0; if (__predict_false(td->td_cowgen != p->p_cowgen)) thread_cow_update(td); traced = (p->p_flag & P_TRACED) != 0; if (__predict_false(traced || td->td_dbgflags & TDB_USERWR)) { PROC_LOCK(p); td->td_dbgflags &= ~TDB_USERWR; if (traced) td->td_dbgflags |= TDB_SCE; PROC_UNLOCK(p); } error = (p->p_sysent->sv_fetch_syscall_args)(td); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(sa->code, sa->narg, sa->args); + ktrsyscall(sa->code, sa->callp->sy_narg, sa->args); #endif KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code), (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0], "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]); if (__predict_false(error != 0)) { td->td_errno = error; goto retval; } if (__predict_false((p->p_flag & P_TRACED) != 0)) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_SCE) ptracestop((td), SIGTRAP, NULL); PROC_UNLOCK(p); } if (__predict_false((td->td_dbgflags & TDB_USERWR) != 0)) { /* * Reread syscall number and arguments if debugger * modified registers or memory. */ error = (p->p_sysent->sv_fetch_syscall_args)(td); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(sa->code, sa->narg, sa->args); + ktrsyscall(sa->code, sa->callp->sy_narg, sa->args); #endif if (error != 0) { td->td_errno = error; goto retval; } } #ifdef CAPABILITY_MODE /* * In capability mode, we only allow access to system calls * flagged with SYF_CAPENABLED. */ if (__predict_false(IN_CAPABILITY_MODE(td) && !(sa->callp->sy_flags & SYF_CAPENABLED))) { td->td_errno = error = ECAPMODE; goto retval; } #endif error = syscall_thread_enter(td, sa->callp); if (error != 0) { td->td_errno = error; goto retval; } /* * Fetch fast sigblock value at the time of syscall entry to * handle sleepqueue primitives which might call cursig(). */ if (__predict_false(sigfastblock_fetch_always)) (void)sigfastblock_fetch(td); /* Let system calls set td_errno directly. */ td->td_pflags &= ~TDP_NERRNO; if (__predict_false(SYSTRACE_ENABLED() || AUDIT_SYSCALL_ENTER(sa->code, td))) { #ifdef KDTRACE_HOOKS /* Give the syscall:::entry DTrace probe a chance to fire. */ if (__predict_false(sa->callp->sy_entry != 0)) (*systrace_probe_func)(sa, SYSTRACE_ENTRY, 0); #endif error = (sa->callp->sy_call)(td, sa->args); /* Save the latest error return value. */ if (__predict_false((td->td_pflags & TDP_NERRNO) == 0)) td->td_errno = error; AUDIT_SYSCALL_EXIT(error, td); #ifdef KDTRACE_HOOKS /* Give the syscall:::return DTrace probe a chance to fire. */ if (__predict_false(sa->callp->sy_return != 0)) (*systrace_probe_func)(sa, SYSTRACE_RETURN, error ? -1 : td->td_retval[0]); #endif } else { error = (sa->callp->sy_call)(td, sa->args); /* Save the latest error return value. */ if (__predict_false((td->td_pflags & TDP_NERRNO) == 0)) td->td_errno = error; } syscall_thread_exit(td, sa->callp); retval: KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code), (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "error:%d", error, "retval0:%#lx", td->td_retval[0], "retval1:%#lx", td->td_retval[1]); if (__predict_false(traced)) { PROC_LOCK(p); td->td_dbgflags &= ~TDB_SCE; PROC_UNLOCK(p); } (p->p_sysent->sv_set_syscall_retval)(td, error); } static inline void syscallret(struct thread *td) { struct proc *p; struct syscall_args *sa; ksiginfo_t ksi; int traced; KASSERT((td->td_pflags & TDP_FORKING) == 0, ("fork() did not clear TDP_FORKING upon completion")); p = td->td_proc; sa = &td->td_sa; if (__predict_false(td->td_errno == ENOTCAPABLE || td->td_errno == ECAPMODE)) { if ((trap_enotcap || (p->p_flag2 & P2_TRAPCAP) != 0) && IN_CAPABILITY_MODE(td)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_errno = td->td_errno; ksi.ksi_code = TRAP_CAP; trapsignal(td, &ksi); } } /* * Handle reschedule and other end-of-syscall issues */ userret(td, td->td_frame); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { ktrsysret(sa->code, td->td_errno, td->td_retval[0]); } #endif traced = 0; if (__predict_false(p->p_flag & P_TRACED)) { traced = 1; PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; PROC_UNLOCK(p); } if (__predict_false(traced || (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0)) { PROC_LOCK(p); /* * If tracing the execed process, trap to the debugger * so that breakpoints can be set before the program * executes. If debugger requested tracing of syscall * returns, do it now too. */ if (traced && ((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 || (p->p_ptevents & PTRACE_SCX) != 0)) ptracestop(td, SIGTRAP, NULL); td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK); PROC_UNLOCK(p); } if (__predict_false(td->td_pflags & TDP_RFPPWAIT)) fork_rfppwait(td); } Index: head/sys/kern/sys_process.c =================================================================== --- head/sys/kern/sys_process.c (revision 366204) +++ head/sys/kern/sys_process.c (revision 366205) @@ -1,1326 +1,1326 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1994, Sean Eric Fagan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #endif /* * Functions implemented using PROC_ACTION(): * * proc_read_regs(proc, regs) * Get the current user-visible register set from the process * and copy it into the regs structure (). * The process is stopped at the time read_regs is called. * * proc_write_regs(proc, regs) * Update the current register set from the passed in regs * structure. Take care to avoid clobbering special CPU * registers or privileged bits in the PSL. * Depending on the architecture this may have fix-up work to do, * especially if the IAR or PCW are modified. * The process is stopped at the time write_regs is called. * * proc_read_fpregs, proc_write_fpregs * deal with the floating point register set, otherwise as above. * * proc_read_dbregs, proc_write_dbregs * deal with the processor debug register set, otherwise as above. * * proc_sstep(proc) * Arrange for the process to trap after executing a single instruction. */ #define PROC_ACTION(action) do { \ int error; \ \ PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); \ if ((td->td_proc->p_flag & P_INMEM) == 0) \ error = EIO; \ else \ error = (action); \ return (error); \ } while(0) int proc_read_regs(struct thread *td, struct reg *regs) { PROC_ACTION(fill_regs(td, regs)); } int proc_write_regs(struct thread *td, struct reg *regs) { PROC_ACTION(set_regs(td, regs)); } int proc_read_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(fill_dbregs(td, dbregs)); } int proc_write_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(set_dbregs(td, dbregs)); } /* * Ptrace doesn't support fpregs at all, and there are no security holes * or translations for fpregs, so we can just copy them. */ int proc_read_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(fill_fpregs(td, fpregs)); } int proc_write_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(set_fpregs(td, fpregs)); } #ifdef COMPAT_FREEBSD32 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */ int proc_read_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(fill_regs32(td, regs32)); } int proc_write_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(set_regs32(td, regs32)); } int proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(fill_dbregs32(td, dbregs32)); } int proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(set_dbregs32(td, dbregs32)); } int proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(fill_fpregs32(td, fpregs32)); } int proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(set_fpregs32(td, fpregs32)); } #endif int proc_sstep(struct thread *td) { PROC_ACTION(ptrace_single_step(td)); } int proc_rwmem(struct proc *p, struct uio *uio) { vm_map_t map; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; int error, fault_flags, page_offset, writing; /* * Assert that someone has locked this vmspace. (Should be * curthread but we can't assert that.) This keeps the process * from exiting out from under us until this operation completes. */ PROC_ASSERT_HELD(p); PROC_LOCK_ASSERT(p, MA_NOTOWNED); /* * The map we want... */ map = &p->p_vmspace->vm_map; /* * If we are writing, then we request vm_fault() to create a private * copy of each page. Since these copies will not be writeable by the * process, we must explicity request that they be dirtied. */ writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; /* * Only map in one page at a time. We don't have to, but it * makes things easier. This way is trivial - right? */ do { vm_offset_t uva; u_int len; vm_page_t m; uva = (vm_offset_t)uio->uio_offset; /* * Get the page number of this segment. */ pageno = trunc_page(uva); page_offset = uva - pageno; /* * How many bytes to copy */ len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* * Fault and hold the page on behalf of the process. */ error = vm_fault(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; else error = EFAULT; break; } /* * Now do the i/o move. */ error = uiomove_fromphys(&m, page_offset, len, uio); /* Make the I-cache coherent for breakpoints. */ if (writing && error == 0) { vm_map_lock_read(map); if (vm_map_check_protection(map, pageno, pageno + PAGE_SIZE, VM_PROT_EXECUTE)) vm_sync_icache(map, uva, len); vm_map_unlock_read(map); } /* * Release the page. */ vm_page_unwire(m, PQ_ACTIVE); } while (error == 0 && uio->uio_resid > 0); return (error); } static ssize_t proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf, size_t len, enum uio_rw rw) { struct iovec iov; struct uio uio; ssize_t slen; MPASS(len < SSIZE_MAX); slen = (ssize_t)len; iov.iov_base = (caddr_t)buf; iov.iov_len = len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = va; uio.uio_resid = slen; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = rw; uio.uio_td = td; proc_rwmem(p, &uio); if (uio.uio_resid == slen) return (-1); return (slen - uio.uio_resid); } ssize_t proc_readmem(struct thread *td, struct proc *p, vm_offset_t va, void *buf, size_t len) { return (proc_iop(td, p, va, buf, len, UIO_READ)); } ssize_t proc_writemem(struct thread *td, struct proc *p, vm_offset_t va, void *buf, size_t len) { return (proc_iop(td, p, va, buf, len, UIO_WRITE)); } static int ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve) { struct vattr vattr; vm_map_t map; vm_map_entry_t entry; vm_object_t obj, tobj, lobj; struct vmspace *vm; struct vnode *vp; char *freepath, *fullpath; u_int pathlen; int error, index; error = 0; obj = NULL; vm = vmspace_acquire_ref(p); map = &vm->vm_map; vm_map_lock_read(map); do { KASSERT((map->header.eflags & MAP_ENTRY_IS_SUB_MAP) == 0, ("Submap in map header")); index = 0; VM_MAP_ENTRY_FOREACH(entry, map) { if (index >= pve->pve_entry && (entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) break; index++; } if (index < pve->pve_entry) { error = EINVAL; break; } if (entry == &map->header) { error = ENOENT; break; } /* We got an entry. */ pve->pve_entry = index + 1; pve->pve_timestamp = map->timestamp; pve->pve_start = entry->start; pve->pve_end = entry->end - 1; pve->pve_offset = entry->offset; pve->pve_prot = entry->protection; /* Backing object's path needed? */ if (pve->pve_pathlen == 0) break; pathlen = pve->pve_pathlen; pve->pve_pathlen = 0; obj = entry->object.vm_object; if (obj != NULL) VM_OBJECT_RLOCK(obj); } while (0); vm_map_unlock_read(map); pve->pve_fsid = VNOVAL; pve->pve_fileid = VNOVAL; if (error == 0 && obj != NULL) { lobj = obj; for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; pve->pve_offset += tobj->backing_object_offset; } vp = vm_object_vnode(lobj); if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { freepath = NULL; fullpath = NULL; vn_fullpath(vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) { pve->pve_fileid = vattr.va_fileid; pve->pve_fsid = vattr.va_fsid; } vput(vp); if (fullpath != NULL) { pve->pve_pathlen = strlen(fullpath) + 1; if (pve->pve_pathlen <= pathlen) { error = copyout(fullpath, pve->pve_path, pve->pve_pathlen); } else error = ENAMETOOLONG; } if (freepath != NULL) free(freepath, M_TEMP); } } vmspace_free(vm); if (error == 0) CTR3(KTR_PTRACE, "PT_VM_ENTRY: pid %d, entry %d, start %p", p->p_pid, pve->pve_entry, pve->pve_start); return (error); } /* * Process debugging system call. */ #ifndef _SYS_SYSPROTO_H_ struct ptrace_args { int req; pid_t pid; caddr_t addr; int data; }; #endif int sys_ptrace(struct thread *td, struct ptrace_args *uap) { /* * XXX this obfuscation is to reduce stack usage, but the register * structs may be too large to put on the stack anyway. */ union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct dbreg dbreg; struct fpreg fpreg; struct reg reg; char args[sizeof(td->td_sa.args)]; struct ptrace_sc_ret psr; int ptevents; } r; void *addr; int error = 0; AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; switch (uap->req) { case PT_GET_EVENT_MASK: case PT_LWPINFO: case PT_GET_SC_ARGS: case PT_GET_SC_RET: break; case PT_GETREGS: bzero(&r.reg, sizeof(r.reg)); break; case PT_GETFPREGS: bzero(&r.fpreg, sizeof(r.fpreg)); break; case PT_GETDBREGS: bzero(&r.dbreg, sizeof(r.dbreg)); break; case PT_SETREGS: error = copyin(uap->addr, &r.reg, sizeof(r.reg)); break; case PT_SETFPREGS: error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg)); break; case PT_SETDBREGS: error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg)); break; case PT_SET_EVENT_MASK: if (uap->data != sizeof(r.ptevents)) error = EINVAL; else error = copyin(uap->addr, &r.ptevents, uap->data); break; case PT_IO: error = copyin(uap->addr, &r.piod, sizeof(r.piod)); break; case PT_VM_ENTRY: error = copyin(uap->addr, &r.pve, sizeof(r.pve)); break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: error = copyout(&r.pve, uap->addr, sizeof(r.pve)); break; case PT_IO: error = copyout(&r.piod, uap->addr, sizeof(r.piod)); break; case PT_GETREGS: error = copyout(&r.reg, uap->addr, sizeof(r.reg)); break; case PT_GETFPREGS: error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg)); break; case PT_GETDBREGS: error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg)); break; case PT_GET_EVENT_MASK: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.ptevents, uap->addr, uap->data); break; case PT_LWPINFO: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.pl, uap->addr, uap->data); break; case PT_GET_SC_ARGS: error = copyout(r.args, uap->addr, MIN(uap->data, sizeof(r.args))); break; case PT_GET_SC_RET: error = copyout(&r.psr, uap->addr, MIN(uap->data, sizeof(r.psr))); break; } return (error); } #ifdef COMPAT_FREEBSD32 /* * PROC_READ(regs, td2, addr); * becomes either: * proc_read_regs(td2, addr); * or * proc_read_regs32(td2, addr); * .. except this is done at runtime. There is an additional * complication in that PROC_WRITE disallows 32 bit consumers * from writing to 64 bit address space targets. */ #define PROC_READ(w, t, a) wrap32 ? \ proc_read_ ## w ## 32(t, a) : \ proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) wrap32 ? \ (safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \ proc_write_ ## w (t, a) #else #define PROC_READ(w, t, a) proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) proc_write_ ## w (t, a) #endif void proc_set_traced(struct proc *p, bool stop) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); p->p_flag |= P_TRACED; if (stop) p->p_flag2 |= P2_PTRACE_FSTP; p->p_ptevents = PTRACE_DEFAULT; } int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) { struct iovec iov; struct uio uio; struct proc *curp, *p, *pp; struct thread *td2 = NULL, *td3; struct ptrace_io_desc *piod = NULL; struct ptrace_lwpinfo *pl; struct ptrace_sc_ret *psr; int error, num, tmp; int proctree_locked = 0; lwpid_t tid = 0, *buf; #ifdef COMPAT_FREEBSD32 int wrap32 = 0, safe = 0; #endif curp = td->td_proc; /* Lock proctree before locking the process. */ switch (req) { case PT_TRACE_ME: case PT_ATTACH: case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_FOLLOW_FORK: case PT_LWP_EVENTS: case PT_GET_EVENT_MASK: case PT_SET_EVENT_MASK: case PT_DETACH: case PT_GET_SC_ARGS: sx_xlock(&proctree_lock); proctree_locked = 1; break; default: break; } if (req == PT_TRACE_ME) { p = td->td_proc; PROC_LOCK(p); } else { if (pid <= PID_MAX) { if ((p = pfind(pid)) == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } } else { td2 = tdfind(pid, -1); if (td2 == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } p = td2->td_proc; tid = pid; pid = p->p_pid; } } AUDIT_ARG_PROCESS(p); if ((p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto fail; } if ((error = p_cansee(td, p)) != 0) goto fail; if ((error = p_candebug(td, p)) != 0) goto fail; /* * System processes can't be debugged. */ if ((p->p_flag & P_SYSTEM) != 0) { error = EINVAL; goto fail; } if (tid == 0) { if ((p->p_flag & P_STOPPED_TRACE) != 0) { KASSERT(p->p_xthread != NULL, ("NULL p_xthread")); td2 = p->p_xthread; } else { td2 = FIRST_THREAD_IN_PROC(p); } tid = td2->td_tid; } #ifdef COMPAT_FREEBSD32 /* * Test if we're a 32 bit client and what the target is. * Set the wrap controls accordingly. */ if (SV_CURPROC_FLAG(SV_ILP32)) { if (SV_PROC_FLAG(td2->td_proc, SV_ILP32)) safe = 1; wrap32 = 1; } #endif /* * Permissions check */ switch (req) { case PT_TRACE_ME: /* * Always legal, when there is a parent process which * could trace us. Otherwise, reject. */ if ((p->p_flag & P_TRACED) != 0) { error = EBUSY; goto fail; } if (p->p_pptr == initproc) { error = EPERM; goto fail; } break; case PT_ATTACH: /* Self */ if (p == td->td_proc) { error = EINVAL; goto fail; } /* Already traced */ if (p->p_flag & P_TRACED) { error = EBUSY; goto fail; } /* Can't trace an ancestor if you're being traced. */ if (curp->p_flag & P_TRACED) { for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) { if (pp == p) { error = EINVAL; goto fail; } } } /* OK */ break; case PT_CLEARSTEP: /* Allow thread to clear single step for itself */ if (td->td_tid == tid) break; /* FALLTHROUGH */ default: /* not being traced... */ if ((p->p_flag & P_TRACED) == 0) { error = EPERM; goto fail; } /* not being traced by YOU */ if (p->p_pptr != td->td_proc) { error = EBUSY; goto fail; } /* not currently stopped */ if ((p->p_flag & P_STOPPED_TRACE) == 0 || p->p_suspcount != p->p_numthreads || (p->p_flag & P_WAITED) == 0) { error = EBUSY; goto fail; } /* OK */ break; } /* Keep this process around until we finish this request. */ _PHOLD(p); #ifdef FIX_SSTEP /* * Single step fixup ala procfs */ FIX_SSTEP(td2); #endif /* * Actually do the requests */ td->td_retval[0] = 0; switch (req) { case PT_TRACE_ME: /* set my trace flag and "owner" so it can read/write me */ proc_set_traced(p, false); if (p->p_flag & P_PPWAIT) p->p_flag |= P_PPTRACE; CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid); break; case PT_ATTACH: /* security check done above */ /* * It would be nice if the tracing relationship was separate * from the parent relationship but that would require * another set of links in the proc struct or for "wait" * to scan the entire proc table. To make life easier, * we just re-parent the process we're trying to trace. * The old parent is remembered so we can put things back * on a "detach". */ proc_set_traced(p, true); proc_reparent(p, td->td_proc, false); CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid, p->p_oppid); sx_xunlock(&proctree_lock); proctree_locked = 0; MPASS(p->p_xthread == NULL); MPASS((p->p_flag & P_STOPPED_TRACE) == 0); /* * If already stopped due to a stop signal, clear the * existing stop before triggering a traced SIGSTOP. */ if ((p->p_flag & P_STOPPED_SIG) != 0) { PROC_SLOCK(p); p->p_flag &= ~(P_STOPPED_SIG | P_WAITED); thread_unsuspend(p); PROC_SUNLOCK(p); } kern_psignal(p, SIGSTOP); break; case PT_CLEARSTEP: CTR2(KTR_PTRACE, "PT_CLEARSTEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_clear_single_step(td2); break; case PT_SETSTEP: CTR2(KTR_PTRACE, "PT_SETSTEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_single_step(td2); break; case PT_SUSPEND: CTR2(KTR_PTRACE, "PT_SUSPEND: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_SUSPEND; thread_lock(td2); td2->td_flags |= TDF_NEEDSUSPCHK; thread_unlock(td2); break; case PT_RESUME: CTR2(KTR_PTRACE, "PT_RESUME: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags &= ~TDB_SUSPEND; break; case PT_FOLLOW_FORK: CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid, p->p_ptevents & PTRACE_FORK ? "enabled" : "disabled", data ? "enabled" : "disabled"); if (data) p->p_ptevents |= PTRACE_FORK; else p->p_ptevents &= ~PTRACE_FORK; break; case PT_LWP_EVENTS: CTR3(KTR_PTRACE, "PT_LWP_EVENTS: pid %d %s -> %s", p->p_pid, p->p_ptevents & PTRACE_LWP ? "enabled" : "disabled", data ? "enabled" : "disabled"); if (data) p->p_ptevents |= PTRACE_LWP; else p->p_ptevents &= ~PTRACE_LWP; break; case PT_GET_EVENT_MASK: if (data != sizeof(p->p_ptevents)) { error = EINVAL; break; } CTR2(KTR_PTRACE, "PT_GET_EVENT_MASK: pid %d mask %#x", p->p_pid, p->p_ptevents); *(int *)addr = p->p_ptevents; break; case PT_SET_EVENT_MASK: if (data != sizeof(p->p_ptevents)) { error = EINVAL; break; } tmp = *(int *)addr; if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX | PTRACE_FORK | PTRACE_LWP | PTRACE_VFORK)) != 0) { error = EINVAL; break; } CTR3(KTR_PTRACE, "PT_SET_EVENT_MASK: pid %d mask %#x -> %#x", p->p_pid, p->p_ptevents, tmp); p->p_ptevents = tmp; break; case PT_GET_SC_ARGS: CTR1(KTR_PTRACE, "PT_GET_SC_ARGS: pid %d", p->p_pid); if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) == 0 #ifdef COMPAT_FREEBSD32 || (wrap32 && !safe) #endif ) { error = EINVAL; break; } bzero(addr, sizeof(td2->td_sa.args)); - bcopy(td2->td_sa.args, addr, td2->td_sa.narg * + bcopy(td2->td_sa.args, addr, td2->td_sa.callp->sy_narg * sizeof(register_t)); break; case PT_GET_SC_RET: if ((td2->td_dbgflags & (TDB_SCX)) == 0 #ifdef COMPAT_FREEBSD32 || (wrap32 && !safe) #endif ) { error = EINVAL; break; } psr = addr; bzero(psr, sizeof(*psr)); psr->sr_error = td2->td_errno; if (psr->sr_error == 0) { psr->sr_retval[0] = td2->td_retval[0]; psr->sr_retval[1] = td2->td_retval[1]; } CTR4(KTR_PTRACE, "PT_GET_SC_RET: pid %d error %d retval %#lx,%#lx", p->p_pid, psr->sr_error, psr->sr_retval[0], psr->sr_retval[1]); break; case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_DETACH: /* Zero means do not send any signal */ if (data < 0 || data > _SIG_MAXSIG) { error = EINVAL; break; } switch (req) { case PT_STEP: CTR3(KTR_PTRACE, "PT_STEP: tid %d (pid %d), sig = %d", td2->td_tid, p->p_pid, data); error = ptrace_single_step(td2); if (error) goto out; break; case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: if (addr != (void *)1) { error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr); if (error) goto out; } switch (req) { case PT_TO_SCE: p->p_ptevents |= PTRACE_SCE; CTR4(KTR_PTRACE, "PT_TO_SCE: pid %d, events = %#x, PC = %#lx, sig = %d", p->p_pid, p->p_ptevents, (u_long)(uintfptr_t)addr, data); break; case PT_TO_SCX: p->p_ptevents |= PTRACE_SCX; CTR4(KTR_PTRACE, "PT_TO_SCX: pid %d, events = %#x, PC = %#lx, sig = %d", p->p_pid, p->p_ptevents, (u_long)(uintfptr_t)addr, data); break; case PT_SYSCALL: p->p_ptevents |= PTRACE_SYSCALL; CTR4(KTR_PTRACE, "PT_SYSCALL: pid %d, events = %#x, PC = %#lx, sig = %d", p->p_pid, p->p_ptevents, (u_long)(uintfptr_t)addr, data); break; case PT_CONTINUE: CTR3(KTR_PTRACE, "PT_CONTINUE: pid %d, PC = %#lx, sig = %d", p->p_pid, (u_long)(uintfptr_t)addr, data); break; } break; case PT_DETACH: /* * Reset the process parent. * * NB: This clears P_TRACED before reparenting * a detached process back to its original * parent. Otherwise the debugee will be set * as an orphan of the debugger. */ p->p_flag &= ~(P_TRACED | P_WAITED); if (p->p_oppid != p->p_pptr->p_pid) { PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); pp = proc_realparent(p); proc_reparent(p, pp, false); if (pp == initproc) p->p_sigparent = SIGCHLD; CTR3(KTR_PTRACE, "PT_DETACH: pid %d reparented to pid %d, sig %d", p->p_pid, pp->p_pid, data); } else CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d", p->p_pid, data); p->p_ptevents = 0; FOREACH_THREAD_IN_PROC(p, td3) { if ((td3->td_dbgflags & TDB_FSTP) != 0) { sigqueue_delete(&td3->td_sigqueue, SIGSTOP); } td3->td_dbgflags &= ~(TDB_XSIG | TDB_FSTP | TDB_SUSPEND); } if ((p->p_flag2 & P2_PTRACE_FSTP) != 0) { sigqueue_delete(&p->p_sigqueue, SIGSTOP); p->p_flag2 &= ~P2_PTRACE_FSTP; } /* should we send SIGCHLD? */ /* childproc_continued(p); */ break; } sx_xunlock(&proctree_lock); proctree_locked = 0; sendsig: MPASS(proctree_locked == 0); /* * Clear the pending event for the thread that just * reported its event (p_xthread). This may not be * the thread passed to PT_CONTINUE, PT_STEP, etc. if * the debugger is resuming a different thread. * * Deliver any pending signal via the reporting thread. */ MPASS(p->p_xthread != NULL); p->p_xthread->td_dbgflags &= ~TDB_XSIG; p->p_xthread->td_xsig = data; p->p_xthread = NULL; p->p_xsig = data; /* * P_WKILLED is insurance that a PT_KILL/SIGKILL * always works immediately, even if another thread is * unsuspended first and attempts to handle a * different signal or if the POSIX.1b style signal * queue cannot accommodate any new signals. */ if (data == SIGKILL) proc_wkilled(p); /* * Unsuspend all threads. To leave a thread * suspended, use PT_SUSPEND to suspend it before * continuing the process. */ PROC_SLOCK(p); p->p_flag &= ~(P_STOPPED_TRACE | P_STOPPED_SIG | P_WAITED); thread_unsuspend(p); PROC_SUNLOCK(p); break; case PT_WRITE_I: case PT_WRITE_D: td2->td_dbgflags |= TDB_USERWR; PROC_UNLOCK(p); error = 0; if (proc_writemem(td, p, (off_t)(uintptr_t)addr, &data, sizeof(int)) != sizeof(int)) error = ENOMEM; else CTR3(KTR_PTRACE, "PT_WRITE: pid %d: %p <= %#x", p->p_pid, addr, data); PROC_LOCK(p); break; case PT_READ_I: case PT_READ_D: PROC_UNLOCK(p); error = tmp = 0; if (proc_readmem(td, p, (off_t)(uintptr_t)addr, &tmp, sizeof(int)) != sizeof(int)) error = ENOMEM; else CTR3(KTR_PTRACE, "PT_READ: pid %d: %p >= %#x", p->p_pid, addr, tmp); td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_IO: piod = addr; iov.iov_base = piod->piod_addr; iov.iov_len = piod->piod_len; uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs; uio.uio_resid = piod->piod_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_USERSPACE; uio.uio_td = td; switch (piod->piod_op) { case PIOD_READ_D: case PIOD_READ_I: CTR3(KTR_PTRACE, "PT_IO: pid %d: READ (%p, %#x)", p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid); uio.uio_rw = UIO_READ; break; case PIOD_WRITE_D: case PIOD_WRITE_I: CTR3(KTR_PTRACE, "PT_IO: pid %d: WRITE (%p, %#x)", p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid); td2->td_dbgflags |= TDB_USERWR; uio.uio_rw = UIO_WRITE; break; default: error = EINVAL; goto out; } PROC_UNLOCK(p); error = proc_rwmem(p, &uio); piod->piod_len -= uio.uio_resid; PROC_LOCK(p); break; case PT_KILL: CTR1(KTR_PTRACE, "PT_KILL: pid %d", p->p_pid); data = SIGKILL; goto sendsig; /* in PT_CONTINUE above */ case PT_SETREGS: CTR2(KTR_PTRACE, "PT_SETREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(regs, td2, addr); break; case PT_GETREGS: CTR2(KTR_PTRACE, "PT_GETREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(regs, td2, addr); break; case PT_SETFPREGS: CTR2(KTR_PTRACE, "PT_SETFPREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(fpregs, td2, addr); break; case PT_GETFPREGS: CTR2(KTR_PTRACE, "PT_GETFPREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(fpregs, td2, addr); break; case PT_SETDBREGS: CTR2(KTR_PTRACE, "PT_SETDBREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(dbregs, td2, addr); break; case PT_GETDBREGS: CTR2(KTR_PTRACE, "PT_GETDBREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(dbregs, td2, addr); break; case PT_LWPINFO: if (data <= 0 || data > sizeof(*pl)) { error = EINVAL; break; } pl = addr; bzero(pl, sizeof(*pl)); pl->pl_lwpid = td2->td_tid; pl->pl_event = PL_EVENT_NONE; pl->pl_flags = 0; if (td2->td_dbgflags & TDB_XSIG) { pl->pl_event = PL_EVENT_SIGNAL; if (td2->td_si.si_signo != 0 && data >= offsetof(struct ptrace_lwpinfo, pl_siginfo) + sizeof(pl->pl_siginfo)){ pl->pl_flags |= PL_FLAG_SI; pl->pl_siginfo = td2->td_si; } } if (td2->td_dbgflags & TDB_SCE) pl->pl_flags |= PL_FLAG_SCE; else if (td2->td_dbgflags & TDB_SCX) pl->pl_flags |= PL_FLAG_SCX; if (td2->td_dbgflags & TDB_EXEC) pl->pl_flags |= PL_FLAG_EXEC; if (td2->td_dbgflags & TDB_FORK) { pl->pl_flags |= PL_FLAG_FORKED; pl->pl_child_pid = td2->td_dbg_forked; if (td2->td_dbgflags & TDB_VFORK) pl->pl_flags |= PL_FLAG_VFORKED; } else if ((td2->td_dbgflags & (TDB_SCX | TDB_VFORK)) == TDB_VFORK) pl->pl_flags |= PL_FLAG_VFORK_DONE; if (td2->td_dbgflags & TDB_CHILD) pl->pl_flags |= PL_FLAG_CHILD; if (td2->td_dbgflags & TDB_BORN) pl->pl_flags |= PL_FLAG_BORN; if (td2->td_dbgflags & TDB_EXIT) pl->pl_flags |= PL_FLAG_EXITED; pl->pl_sigmask = td2->td_sigmask; pl->pl_siglist = td2->td_siglist; strcpy(pl->pl_tdname, td2->td_name); if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) { pl->pl_syscall_code = td2->td_sa.code; - pl->pl_syscall_narg = td2->td_sa.narg; + pl->pl_syscall_narg = td2->td_sa.callp->sy_narg; } else { pl->pl_syscall_code = 0; pl->pl_syscall_narg = 0; } CTR6(KTR_PTRACE, "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d syscall %d", td2->td_tid, p->p_pid, pl->pl_event, pl->pl_flags, pl->pl_child_pid, pl->pl_syscall_code); break; case PT_GETNUMLWPS: CTR2(KTR_PTRACE, "PT_GETNUMLWPS: pid %d: %d threads", p->p_pid, p->p_numthreads); td->td_retval[0] = p->p_numthreads; break; case PT_GETLWPLIST: CTR3(KTR_PTRACE, "PT_GETLWPLIST: pid %d: data %d, actual %d", p->p_pid, data, p->p_numthreads); if (data <= 0) { error = EINVAL; break; } num = imin(p->p_numthreads, data); PROC_UNLOCK(p); buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK); tmp = 0; PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (tmp >= num) break; buf[tmp++] = td2->td_tid; } PROC_UNLOCK(p); error = copyout(buf, addr, tmp * sizeof(lwpid_t)); free(buf, M_TEMP); if (!error) td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_VM_TIMESTAMP: CTR2(KTR_PTRACE, "PT_VM_TIMESTAMP: pid %d: timestamp %d", p->p_pid, p->p_vmspace->vm_map.timestamp); td->td_retval[0] = p->p_vmspace->vm_map.timestamp; break; case PT_VM_ENTRY: PROC_UNLOCK(p); error = ptrace_vm_entry(td, p, addr); PROC_LOCK(p); break; default: #ifdef __HAVE_PTRACE_MACHDEP if (req >= PT_FIRSTMACH) { PROC_UNLOCK(p); error = cpu_ptrace(td2, req, addr, data); PROC_LOCK(p); } else #endif /* Unknown request. */ error = EINVAL; break; } out: /* Drop our hold on this process now that the request has completed. */ _PRELE(p); fail: PROC_UNLOCK(p); if (proctree_locked) sx_xunlock(&proctree_lock); return (error); } #undef PROC_READ #undef PROC_WRITE Index: head/sys/mips/include/proc.h =================================================================== --- head/sys/mips/include/proc.h (revision 366204) +++ head/sys/mips/include/proc.h (revision 366205) @@ -1,99 +1,98 @@ /* $OpenBSD: proc.h,v 1.2 1998/09/15 10:50:12 pefo Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Ralph Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.1 (Berkeley) 6/10/93 * JNPR: proc.h,v 1.7.2.1 2007/09/10 06:25:24 girish * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ #ifdef CPU_CNMIPS #include #endif /* * Machine-dependent part of the proc structure. */ struct mdthread { int md_flags; /* machine-dependent flags */ #if defined(__mips_n64) || defined(__mips_n32) /* PHYSADDR_64_BIT */ uint64_t md_upte[KSTACK_PAGES]; /* ptes for mapping u pcb */ #else int md_upte[KSTACK_PAGES]; #endif uintptr_t md_ss_addr; /* single step address for ptrace */ int md_ss_instr; /* single step instruction for ptrace */ register_t md_saved_intr; u_int md_spinlock_count; /* The following is CPU dependent, but kept in for compatibility */ int md_pc_ctrl; /* performance counter control */ int md_pc_count; /* performance counter */ int md_pc_spill; /* performance counter spill */ void *md_tls; #ifdef CPU_CNMIPS struct octeon_cop2_state *md_cop2; /* kernel context */ struct octeon_cop2_state *md_ucop2; /* userland context */ #define COP2_OWNER_USERLAND 0x0000 /* Userland owns COP2 */ #define COP2_OWNER_KERNEL 0x0001 /* Kernel owns COP2 */ int md_cop2owner; #endif }; /* md_flags */ #define MDTD_FPUSED 0x0001 /* Process used the FPU */ #define MDTD_COP2USED 0x0002 /* Process used the COP2 */ struct mdproc { size_t md_tls_tcb_offset; /* TCB offset */ }; struct syscall_args { u_int code; struct sysent *callp; register_t args[8]; - int narg; struct trapframe *trapframe; }; #ifdef __mips_n64 #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 816 #else #define KINFO_PROC_SIZE 816 #endif #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/mips/mips/trap.c =================================================================== --- head/sys/mips/mips/trap.c (revision 366204) +++ head/sys/mips/mips/trap.c (revision 366205) @@ -1,1698 +1,1696 @@ /* $OpenBSD: trap.c,v 1.19 1998/09/30 12:40:41 pefo Exp $ */ /* tracked to 1.23 */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988 University of Utah. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and Ralph Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: trap.c 1.32 91/04/06 * * from: @(#)trap.c 8.5 (Berkeley) 1/11/94 * JNPR: trap.c,v 1.13.2.2 2007/08/29 10:03:49 girish */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #include #include #endif #ifdef KDTRACE_HOOKS #include #endif #ifdef TRAP_DEBUG int trap_debug = 0; SYSCTL_INT(_machdep, OID_AUTO, trap_debug, CTLFLAG_RW, &trap_debug, 0, "Debug information on all traps"); #endif #define lbu_macro(data, addr) \ __asm __volatile ("lbu %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define lb_macro(data, addr) \ __asm __volatile ("lb %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define lwl_macro(data, addr) \ __asm __volatile ("lwl %0, 0x0(%1)" \ : "+r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define lwr_macro(data, addr) \ __asm __volatile ("lwr %0, 0x0(%1)" \ : "+r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define ldl_macro(data, addr) \ __asm __volatile ("ldl %0, 0x0(%1)" \ : "+r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define ldr_macro(data, addr) \ __asm __volatile ("ldr %0, 0x0(%1)" \ : "+r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define sb_macro(data, addr) \ __asm __volatile ("sb %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define swl_macro(data, addr) \ __asm __volatile ("swl %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define swr_macro(data, addr) \ __asm __volatile ("swr %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define sdl_macro(data, addr) \ __asm __volatile ("sdl %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define sdr_macro(data, addr) \ __asm __volatile ("sdr %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ static void log_illegal_instruction(const char *, struct trapframe *); static void log_bad_page_fault(char *, struct trapframe *, int); static void log_frame_dump(struct trapframe *frame); static void get_mapping_info(vm_offset_t, pd_entry_t **, pt_entry_t **); int (*dtrace_invop_jump_addr)(struct trapframe *); #ifdef TRAP_DEBUG static void trap_frame_dump(struct trapframe *frame); #endif void (*machExceptionTable[]) (void)= { /* * The kernel exception handlers. */ MipsKernIntr, /* external interrupt */ MipsKernGenException, /* TLB modification */ MipsTLBInvalidException,/* TLB miss (load or instr. fetch) */ MipsTLBInvalidException,/* TLB miss (store) */ MipsKernGenException, /* address error (load or I-fetch) */ MipsKernGenException, /* address error (store) */ MipsKernGenException, /* bus error (I-fetch) */ MipsKernGenException, /* bus error (load or store) */ MipsKernGenException, /* system call */ MipsKernGenException, /* breakpoint */ MipsKernGenException, /* reserved instruction */ MipsKernGenException, /* coprocessor unusable */ MipsKernGenException, /* arithmetic overflow */ MipsKernGenException, /* trap exception */ MipsKernGenException, /* virtual coherence exception inst */ MipsKernGenException, /* floating point exception */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* watch exception */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* virtual coherence exception data */ /* * The user exception handlers. */ MipsUserIntr, /* 0 */ MipsUserGenException, /* 1 */ MipsTLBInvalidException,/* 2 */ MipsTLBInvalidException,/* 3 */ MipsUserGenException, /* 4 */ MipsUserGenException, /* 5 */ MipsUserGenException, /* 6 */ MipsUserGenException, /* 7 */ MipsUserGenException, /* 8 */ MipsUserGenException, /* 9 */ MipsUserGenException, /* 10 */ MipsUserGenException, /* 11 */ MipsUserGenException, /* 12 */ MipsUserGenException, /* 13 */ MipsUserGenException, /* 14 */ MipsUserGenException, /* 15 */ MipsUserGenException, /* 16 */ MipsUserGenException, /* 17 */ MipsUserGenException, /* 18 */ MipsUserGenException, /* 19 */ MipsUserGenException, /* 20 */ MipsUserGenException, /* 21 */ MipsUserGenException, /* 22 */ MipsUserGenException, /* 23 */ MipsUserGenException, /* 24 */ MipsUserGenException, /* 25 */ MipsUserGenException, /* 26 */ MipsUserGenException, /* 27 */ MipsUserGenException, /* 28 */ MipsUserGenException, /* 29 */ MipsUserGenException, /* 20 */ MipsUserGenException, /* 31 */ }; char *trap_type[] = { "external interrupt", "TLB modification", "TLB miss (load or instr. fetch)", "TLB miss (store)", "address error (load or I-fetch)", "address error (store)", "bus error (I-fetch)", "bus error (load or store)", "system call", "breakpoint", "reserved instruction", "coprocessor unusable", "arithmetic overflow", "trap", "virtual coherency instruction", "floating point", "reserved 16", "reserved 17", "reserved 18", "reserved 19", "reserved 20", "reserved 21", "reserved 22", "watch", "reserved 24", "reserved 25", "reserved 26", "reserved 27", "reserved 28", "reserved 29", "reserved 30", "virtual coherency data", }; #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) struct trapdebug trapdebug[TRAPSIZE], *trp = trapdebug; #endif #define KERNLAND(x) ((vm_offset_t)(x) >= VM_MIN_KERNEL_ADDRESS && (vm_offset_t)(x) < VM_MAX_KERNEL_ADDRESS) #define DELAYBRANCH(x) ((x) & MIPS_CR_BR_DELAY) /* * MIPS load/store access type */ enum { MIPS_LHU_ACCESS = 1, MIPS_LH_ACCESS, MIPS_LWU_ACCESS, MIPS_LW_ACCESS, MIPS_LD_ACCESS, MIPS_SH_ACCESS, MIPS_SW_ACCESS, MIPS_SD_ACCESS }; char *access_name[] = { "Load Halfword Unsigned", "Load Halfword", "Load Word Unsigned", "Load Word", "Load Doubleword", "Store Halfword", "Store Word", "Store Doubleword" }; #ifdef CPU_CNMIPS #include #endif static int allow_unaligned_acc = 1; SYSCTL_INT(_vm, OID_AUTO, allow_unaligned_acc, CTLFLAG_RW, &allow_unaligned_acc, 0, "Allow unaligned accesses"); /* * FP emulation is assumed to work on O32, but the code is outdated and crufty * enough that it's a more sensible default to have it disabled when using * other ABIs. At the very least, it needs a lot of help in using * type-semantic ABI-oblivious macros for everything it does. */ #if defined(__mips_o32) static int emulate_fp = 1; #else static int emulate_fp = 0; #endif SYSCTL_INT(_machdep, OID_AUTO, emulate_fp, CTLFLAG_RW, &emulate_fp, 0, "Emulate unimplemented FPU instructions"); static int emulate_unaligned_access(struct trapframe *frame, int mode); extern void fswintrberr(void); /* XXX */ int cpu_fetch_syscall_args(struct thread *td) { struct trapframe *locr0; struct sysentvec *se; struct syscall_args *sa; int error, nsaved; locr0 = td->td_frame; sa = &td->td_sa; bzero(sa->args, sizeof(sa->args)); /* compute next PC after syscall instruction */ td->td_pcb->pcb_tpc = sa->trapframe->pc; /* Remember if restart */ if (DELAYBRANCH(sa->trapframe->cause)) /* Check BD bit */ locr0->pc = MipsEmulateBranch(locr0, sa->trapframe->pc, 0, 0); else locr0->pc += sizeof(int); sa->code = locr0->v0; switch (sa->code) { case SYS___syscall: case SYS_syscall: /* * This is an indirect syscall, in which the code is the first argument. */ #if (!defined(__mips_n32) && !defined(__mips_n64)) || defined(COMPAT_FREEBSD32) if (sa->code == SYS___syscall && SV_PROC_FLAG(td->td_proc, SV_ILP32)) { /* * Like syscall, but code is a quad, so as to maintain alignment * for the rest of the arguments. */ if (_QUAD_LOWWORD == 0) sa->code = locr0->a0; else sa->code = locr0->a1; sa->args[0] = locr0->a2; sa->args[1] = locr0->a3; nsaved = 2; break; } #endif /* * This is either not a quad syscall, or is a quad syscall with a * new ABI in which quads fit in a single register. */ sa->code = locr0->a0; sa->args[0] = locr0->a1; sa->args[1] = locr0->a2; sa->args[2] = locr0->a3; nsaved = 3; #if defined(__mips_n32) || defined(__mips_n64) #ifdef COMPAT_FREEBSD32 if (!SV_PROC_FLAG(td->td_proc, SV_ILP32)) { #endif /* * Non-o32 ABIs support more arguments in registers. */ sa->args[3] = locr0->a4; sa->args[4] = locr0->a5; sa->args[5] = locr0->a6; sa->args[6] = locr0->a7; nsaved += 4; #ifdef COMPAT_FREEBSD32 } #endif #endif break; default: /* * A direct syscall, arguments are just parameters to the syscall. */ sa->args[0] = locr0->a0; sa->args[1] = locr0->a1; sa->args[2] = locr0->a2; sa->args[3] = locr0->a3; nsaved = 4; #if defined (__mips_n32) || defined(__mips_n64) #ifdef COMPAT_FREEBSD32 if (!SV_PROC_FLAG(td->td_proc, SV_ILP32)) { #endif /* * Non-o32 ABIs support more arguments in registers. */ sa->args[4] = locr0->a4; sa->args[5] = locr0->a5; sa->args[6] = locr0->a6; sa->args[7] = locr0->a7; nsaved += 4; #ifdef COMPAT_FREEBSD32 } #endif #endif break; } #ifdef TRAP_DEBUG if (trap_debug) printf("SYSCALL #%d pid:%u\n", sa->code, td->td_proc->p_pid); #endif se = td->td_proc->p_sysent; /* * XXX * Shouldn't this go before switching on the code? */ if (sa->code >= se->sv_size) sa->callp = &se->sv_table[0]; else sa->callp = &se->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; - - if (sa->narg > nsaved) { + if (sa->callp->sy_narg > nsaved) { #if defined(__mips_n32) || defined(__mips_n64) /* * XXX * Is this right for new ABIs? I think the 4 there * should be 8, size there are 8 registers to skip, * not 4, but I'm not certain. */ #ifdef COMPAT_FREEBSD32 if (!SV_PROC_FLAG(td->td_proc, SV_ILP32)) #endif printf("SYSCALL #%u pid:%u, narg (%u) > nsaved (%u).\n", - sa->code, td->td_proc->p_pid, sa->narg, nsaved); + sa->code, td->td_proc->p_pid, sa->callp->sy_narg, nsaved); #endif #if (defined(__mips_n32) || defined(__mips_n64)) && defined(COMPAT_FREEBSD32) if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { unsigned i; int32_t arg; error = 0; /* XXX GCC is awful. */ - for (i = nsaved; i < sa->narg; i++) { + for (i = nsaved; i < sa->callp->sy_narg; i++) { error = copyin((caddr_t)(intptr_t)(locr0->sp + (4 + (i - nsaved)) * sizeof(int32_t)), (caddr_t)&arg, sizeof arg); if (error != 0) break; sa->args[i] = arg; } } else #endif error = copyin((caddr_t)(intptr_t)(locr0->sp + 4 * sizeof(register_t)), (caddr_t)&sa->args[nsaved], - (u_int)(sa->narg - nsaved) * sizeof(register_t)); + (u_int)(sa->callp->sy_narg - nsaved) * sizeof(register_t)); if (error != 0) { locr0->v0 = error; locr0->a3 = 1; } } else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = locr0->v1; } return (error); } #undef __FBSDID #define __FBSDID(x) #include "../../kern/subr_syscall.c" /* * Handle an exception. * Called from MipsKernGenException() or MipsUserGenException() * when a processor trap occurs. * In the case of a kernel trap, we return the pc where to resume if * p->p_addr->u_pcb.pcb_onfault is set, otherwise, return old pc. */ register_t trap(struct trapframe *trapframe) { int type, usermode; int i = 0; unsigned ucode = 0; struct thread *td = curthread; struct proc *p = curproc; vm_prot_t ftype; pmap_t pmap; int access_type; ksiginfo_t ksi; char *msg = NULL; intptr_t addr = 0; register_t pc; int cop, error; register_t *frame_regs; trapdebug_enter(trapframe, 0); #ifdef KDB if (kdb_active) { kdb_reenter(); return (0); } #endif type = (trapframe->cause & MIPS_CR_EXC_CODE) >> MIPS_CR_EXC_CODE_SHIFT; if (TRAPF_USERMODE(trapframe)) { type |= T_USER; usermode = 1; } else { usermode = 0; } /* * Enable hardware interrupts if they were on before the trap. If it * was off disable all so we don't accidently enable it when doing a * return to userland. */ if (trapframe->sr & MIPS_SR_INT_IE) { set_intr_mask(trapframe->sr & MIPS_SR_INT_MASK); intr_enable(); } else { intr_disable(); } #ifdef TRAP_DEBUG if (trap_debug) { static vm_offset_t last_badvaddr = 0; static vm_offset_t this_badvaddr = 0; static int count = 0; u_int32_t pid; printf("trap type %x (%s - ", type, trap_type[type & (~T_USER)]); if (type & T_USER) printf("user mode)\n"); else printf("kernel mode)\n"); #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif pid = mips_rd_entryhi() & TLBHI_ASID_MASK; printf("badaddr = %#jx, pc = %#jx, ra = %#jx, sp = %#jx, sr = %jx, pid = %d, ASID = %u\n", (intmax_t)trapframe->badvaddr, (intmax_t)trapframe->pc, (intmax_t)trapframe->ra, (intmax_t)trapframe->sp, (intmax_t)trapframe->sr, (curproc ? curproc->p_pid : -1), pid); switch (type & ~T_USER) { case T_TLB_MOD: case T_TLB_LD_MISS: case T_TLB_ST_MISS: case T_ADDR_ERR_LD: case T_ADDR_ERR_ST: this_badvaddr = trapframe->badvaddr; break; case T_SYSCALL: this_badvaddr = trapframe->ra; break; default: this_badvaddr = trapframe->pc; break; } if ((last_badvaddr == this_badvaddr) && ((type & ~T_USER) != T_SYSCALL) && ((type & ~T_USER) != T_COP_UNUSABLE)) { if (++count == 3) { trap_frame_dump(trapframe); panic("too many faults at %p\n", (void *)last_badvaddr); } } else { last_badvaddr = this_badvaddr; count = 0; } } #endif #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ /* * XXXDTRACE: add pid probe handler here (if ever) */ if (!usermode) { if (dtrace_trap_func != NULL && (*dtrace_trap_func)(trapframe, type) != 0) return (trapframe->pc); } #endif switch (type) { case T_MCHECK: #ifdef DDB kdb_trap(type, 0, trapframe); #endif panic("MCHECK\n"); break; case T_TLB_MOD: /* check for kernel address */ if (KERNLAND(trapframe->badvaddr)) { if (pmap_emulate_modified(kernel_pmap, trapframe->badvaddr) != 0) { ftype = VM_PROT_WRITE; goto kernel_fault; } return (trapframe->pc); } /* FALLTHROUGH */ case T_TLB_MOD + T_USER: pmap = &p->p_vmspace->vm_pmap; if (pmap_emulate_modified(pmap, trapframe->badvaddr) != 0) { ftype = VM_PROT_WRITE; goto dofault; } if (!usermode) return (trapframe->pc); goto out; case T_TLB_LD_MISS: case T_TLB_ST_MISS: ftype = (type == T_TLB_ST_MISS) ? VM_PROT_WRITE : VM_PROT_READ; /* check for kernel address */ if (KERNLAND(trapframe->badvaddr)) { vm_offset_t va; int rv; kernel_fault: va = (vm_offset_t)trapframe->badvaddr; rv = vm_fault_trap(kernel_map, va, ftype, VM_FAULT_NORMAL, NULL, NULL); if (rv == KERN_SUCCESS) return (trapframe->pc); if (td->td_pcb->pcb_onfault != NULL) { pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; td->td_pcb->pcb_onfault = NULL; return (pc); } goto err; } /* * It is an error for the kernel to access user space except * through the copyin/copyout routines. */ if (td->td_pcb->pcb_onfault == NULL) goto err; goto dofault; case T_TLB_LD_MISS + T_USER: ftype = VM_PROT_READ; goto dofault; case T_TLB_ST_MISS + T_USER: ftype = VM_PROT_WRITE; dofault: { vm_offset_t va; struct vmspace *vm; vm_map_t map; int rv = 0; vm = p->p_vmspace; map = &vm->vm_map; va = (vm_offset_t)trapframe->badvaddr; if (KERNLAND(trapframe->badvaddr)) { /* * Don't allow user-mode faults in kernel * address space. */ goto nogo; } rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &i, &ucode); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ #ifdef VMFAULT_TRACE printf("vm_fault(%p (pmap %p), %p (%p), %x, %d) -> %x at pc %p\n", map, &vm->vm_pmap, (void *)va, (void *)(intptr_t)trapframe->badvaddr, ftype, VM_FAULT_NORMAL, rv, (void *)(intptr_t)trapframe->pc); #endif if (rv == KERN_SUCCESS) { if (!usermode) { return (trapframe->pc); } goto out; } nogo: if (!usermode) { if (td->td_pcb->pcb_onfault != NULL) { pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; td->td_pcb->pcb_onfault = NULL; return (pc); } goto err; } addr = trapframe->badvaddr; msg = "BAD_PAGE_FAULT"; log_bad_page_fault(msg, trapframe, type); break; } case T_ADDR_ERR_LD + T_USER: /* misaligned or kseg access */ case T_ADDR_ERR_ST + T_USER: /* misaligned or kseg access */ if (trapframe->badvaddr < 0 || trapframe->badvaddr >= VM_MAXUSER_ADDRESS) { msg = "ADDRESS_SPACE_ERR"; } else if (allow_unaligned_acc) { int mode; if (type == (T_ADDR_ERR_LD + T_USER)) mode = VM_PROT_READ; else mode = VM_PROT_WRITE; access_type = emulate_unaligned_access(trapframe, mode); if (access_type != 0) goto out; msg = "ALIGNMENT_FIX_ERR"; } else { msg = "ADDRESS_ERR"; } /* FALL THROUGH */ case T_BUS_ERR_IFETCH + T_USER: /* BERR asserted to cpu */ case T_BUS_ERR_LD_ST + T_USER: /* BERR asserted to cpu */ ucode = 0; /* XXX should be VM_PROT_something */ i = SIGBUS; addr = trapframe->pc; if (!msg) msg = "BUS_ERR"; log_bad_page_fault(msg, trapframe, type); break; case T_SYSCALL + T_USER: { td->td_sa.trapframe = trapframe; syscallenter(td); #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) if (trp == trapdebug) trapdebug[TRAPSIZE - 1].code = td->td_sa.code; else trp[-1].code = td->td_sa.code; #endif trapdebug_enter(td->td_frame, -td->td_sa.code); /* * The sync'ing of I & D caches for SYS_ptrace() is * done by procfs_domem() through procfs_rwmem() * instead of being done here under a special check * for SYS_ptrace(). */ syscallret(td); return (trapframe->pc); } #if defined(KDTRACE_HOOKS) || defined(DDB) case T_BREAK: #ifdef KDTRACE_HOOKS if (!usermode && dtrace_invop_jump_addr != NULL && dtrace_invop_jump_addr(trapframe) == 0) return (trapframe->pc); #endif #ifdef DDB kdb_trap(type, 0, trapframe); return (trapframe->pc); #endif #endif case T_BREAK + T_USER: { intptr_t va; uint32_t instr; i = SIGTRAP; ucode = TRAP_BRKPT; /* compute address of break instruction */ va = trapframe->pc; if (DELAYBRANCH(trapframe->cause)) va += sizeof(int); addr = va; if (td->td_md.md_ss_addr != va) break; /* read break instruction */ instr = fuword32((caddr_t)va); if (instr != MIPS_BREAK_SSTEP) break; CTR3(KTR_PTRACE, "trap: tid %d, single step at %#lx: %#08x", td->td_tid, va, instr); PROC_LOCK(p); _PHOLD(p); error = ptrace_clear_single_step(td); _PRELE(p); PROC_UNLOCK(p); if (error == 0) ucode = TRAP_TRACE; break; } case T_IWATCH + T_USER: case T_DWATCH + T_USER: { intptr_t va; /* compute address of trapped instruction */ va = trapframe->pc; if (DELAYBRANCH(trapframe->cause)) va += sizeof(int); printf("watch exception @ %p\n", (void *)va); i = SIGTRAP; ucode = TRAP_BRKPT; addr = va; break; } case T_TRAP + T_USER: { intptr_t va; struct trapframe *locr0 = td->td_frame; /* compute address of trap instruction */ va = trapframe->pc; if (DELAYBRANCH(trapframe->cause)) va += sizeof(int); if (DELAYBRANCH(trapframe->cause)) { /* Check BD bit */ locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); } else { locr0->pc += sizeof(int); } addr = va; i = SIGEMT; /* Stuff it with something for now */ break; } case T_RES_INST + T_USER: { InstFmt inst; inst = *(InstFmt *)(intptr_t)trapframe->pc; switch (inst.RType.op) { case OP_SPECIAL3: switch (inst.RType.func) { case OP_RDHWR: /* Register 29 used for TLS */ if (inst.RType.rd == 29) { frame_regs = &(trapframe->zero); frame_regs[inst.RType.rt] = (register_t)(intptr_t)td->td_md.md_tls; frame_regs[inst.RType.rt] += td->td_proc->p_md.md_tls_tcb_offset; trapframe->pc += sizeof(int); goto out; } break; } break; } log_illegal_instruction("RES_INST", trapframe); i = SIGILL; addr = trapframe->pc; } break; case T_C2E: case T_C2E + T_USER: goto err; break; case T_COP_UNUSABLE: #ifdef CPU_CNMIPS cop = (trapframe->cause & MIPS_CR_COP_ERR) >> MIPS_CR_COP_ERR_SHIFT; /* Handle only COP2 exception */ if (cop != 2) goto err; addr = trapframe->pc; /* save userland cop2 context if it has been touched */ if ((td->td_md.md_flags & MDTD_COP2USED) && (td->td_md.md_cop2owner == COP2_OWNER_USERLAND)) { if (td->td_md.md_ucop2) octeon_cop2_save(td->td_md.md_ucop2); else panic("COP2 was used in user mode but md_ucop2 is NULL"); } if (td->td_md.md_cop2 == NULL) { td->td_md.md_cop2 = octeon_cop2_alloc_ctx(); if (td->td_md.md_cop2 == NULL) panic("Failed to allocate COP2 context"); memset(td->td_md.md_cop2, 0, sizeof(*td->td_md.md_cop2)); } octeon_cop2_restore(td->td_md.md_cop2); /* Make userland re-request its context */ td->td_frame->sr &= ~MIPS_SR_COP_2_BIT; td->td_md.md_flags |= MDTD_COP2USED; td->td_md.md_cop2owner = COP2_OWNER_KERNEL; /* Enable COP2, it will be disabled in cpu_switch */ mips_wr_status(mips_rd_status() | MIPS_SR_COP_2_BIT); return (trapframe->pc); #else goto err; break; #endif case T_COP_UNUSABLE + T_USER: cop = (trapframe->cause & MIPS_CR_COP_ERR) >> MIPS_CR_COP_ERR_SHIFT; if (cop == 1) { /* FP (COP1) instruction */ if (cpuinfo.fpu_id == 0) { log_illegal_instruction("COP1_UNUSABLE", trapframe); i = SIGILL; break; } addr = trapframe->pc; MipsSwitchFPState(PCPU_GET(fpcurthread), td->td_frame); PCPU_SET(fpcurthread, td); #if defined(__mips_n32) || defined(__mips_n64) td->td_frame->sr |= MIPS_SR_COP_1_BIT | MIPS_SR_FR; #else td->td_frame->sr |= MIPS_SR_COP_1_BIT; #endif td->td_md.md_flags |= MDTD_FPUSED; goto out; } #ifdef CPU_CNMIPS else if (cop == 2) { addr = trapframe->pc; if ((td->td_md.md_flags & MDTD_COP2USED) && (td->td_md.md_cop2owner == COP2_OWNER_KERNEL)) { if (td->td_md.md_cop2) octeon_cop2_save(td->td_md.md_cop2); else panic("COP2 was used in kernel mode but md_cop2 is NULL"); } if (td->td_md.md_ucop2 == NULL) { td->td_md.md_ucop2 = octeon_cop2_alloc_ctx(); if (td->td_md.md_ucop2 == NULL) panic("Failed to allocate userland COP2 context"); memset(td->td_md.md_ucop2, 0, sizeof(*td->td_md.md_ucop2)); } octeon_cop2_restore(td->td_md.md_ucop2); td->td_frame->sr |= MIPS_SR_COP_2_BIT; td->td_md.md_flags |= MDTD_COP2USED; td->td_md.md_cop2owner = COP2_OWNER_USERLAND; goto out; } #endif else { log_illegal_instruction("COPn_UNUSABLE", trapframe); i = SIGILL; /* only FPU instructions allowed */ break; } case T_FPE: #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) trapDump("fpintr"); #else printf("FPU Trap: PC %#jx CR %x SR %x\n", (intmax_t)trapframe->pc, (unsigned)trapframe->cause, (unsigned)trapframe->sr); goto err; #endif case T_FPE + T_USER: if (!emulate_fp) { i = SIGFPE; addr = trapframe->pc; break; } MipsFPTrap(trapframe->sr, trapframe->cause, trapframe->pc); goto out; case T_OVFLOW + T_USER: i = SIGFPE; addr = trapframe->pc; break; case T_ADDR_ERR_LD: /* misaligned access */ case T_ADDR_ERR_ST: /* misaligned access */ #ifdef TRAP_DEBUG if (trap_debug) { printf("+++ ADDR_ERR: type = %d, badvaddr = %#jx\n", type, (intmax_t)trapframe->badvaddr); } #endif /* Only allow emulation on a user address */ if (allow_unaligned_acc && ((vm_offset_t)trapframe->badvaddr < VM_MAXUSER_ADDRESS)) { int mode; if (type == T_ADDR_ERR_LD) mode = VM_PROT_READ; else mode = VM_PROT_WRITE; access_type = emulate_unaligned_access(trapframe, mode); if (access_type != 0) return (trapframe->pc); } /* FALLTHROUGH */ case T_BUS_ERR_LD_ST: /* BERR asserted to cpu */ if (td->td_pcb->pcb_onfault != NULL) { pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; td->td_pcb->pcb_onfault = NULL; return (pc); } /* FALLTHROUGH */ default: err: #if !defined(SMP) && defined(DEBUG) trapDump("trap"); #endif #ifdef SMP printf("cpu:%d-", PCPU_GET(cpuid)); #endif printf("Trap cause = %d (%s - ", type, trap_type[type & (~T_USER)]); if (type & T_USER) printf("user mode)\n"); else printf("kernel mode)\n"); #ifdef TRAP_DEBUG if (trap_debug) printf("badvaddr = %#jx, pc = %#jx, ra = %#jx, sr = %#jxx\n", (intmax_t)trapframe->badvaddr, (intmax_t)trapframe->pc, (intmax_t)trapframe->ra, (intmax_t)trapframe->sr); #endif #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; kdb_trap(type, 0, trapframe); kdb_why = KDB_WHY_UNSET; } #endif panic("trap"); } td->td_frame->pc = trapframe->pc; td->td_frame->cause = trapframe->cause; td->td_frame->badvaddr = trapframe->badvaddr; ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type & ~T_USER; trapsignal(td, &ksi); out: /* * Note: we should only get here if returning to user mode. */ userret(td, trapframe); return (trapframe->pc); } #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) void trapDump(char *msg) { register_t s; int i; s = intr_disable(); printf("trapDump(%s)\n", msg); for (i = 0; i < TRAPSIZE; i++) { if (trp == trapdebug) { trp = &trapdebug[TRAPSIZE - 1]; } else { trp--; } if (trp->cause == 0) break; printf("%s: ADR %jx PC %jx CR %jx SR %jx\n", trap_type[(trp->cause & MIPS_CR_EXC_CODE) >> MIPS_CR_EXC_CODE_SHIFT], (intmax_t)trp->vadr, (intmax_t)trp->pc, (intmax_t)trp->cause, (intmax_t)trp->status); printf(" RA %jx SP %jx code %d\n", (intmax_t)trp->ra, (intmax_t)trp->sp, (int)trp->code); } intr_restore(s); } #endif /* * Return the resulting PC as if the branch was executed. */ uintptr_t MipsEmulateBranch(struct trapframe *framePtr, uintptr_t instPC, int fpcCSR, uintptr_t instptr) { InstFmt inst; register_t *regsPtr = (register_t *) framePtr; uintptr_t retAddr = 0; int condition; #define GetBranchDest(InstPtr, inst) \ (InstPtr + 4 + ((short)inst.IType.imm << 2)) if (instptr) { if (instptr < MIPS_KSEG0_START) inst.word = fuword32((void *)instptr); else inst = *(InstFmt *) instptr; } else { if ((vm_offset_t)instPC < MIPS_KSEG0_START) inst.word = fuword32((void *)instPC); else inst = *(InstFmt *) instPC; } switch ((int)inst.JType.op) { case OP_SPECIAL: switch ((int)inst.RType.func) { case OP_JR: case OP_JALR: retAddr = regsPtr[inst.RType.rs]; break; default: retAddr = instPC + 4; break; } break; case OP_BCOND: switch ((int)inst.IType.rt) { case OP_BLTZ: case OP_BLTZL: case OP_BLTZAL: case OP_BLTZALL: if ((int)(regsPtr[inst.RType.rs]) < 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BGEZ: case OP_BGEZL: case OP_BGEZAL: case OP_BGEZALL: if ((int)(regsPtr[inst.RType.rs]) >= 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_TGEI: case OP_TGEIU: case OP_TLTI: case OP_TLTIU: case OP_TEQI: case OP_TNEI: retAddr = instPC + 4; /* Like syscall... */ break; default: panic("MipsEmulateBranch: Bad branch cond"); } break; case OP_J: case OP_JAL: retAddr = (inst.JType.target << 2) | ((unsigned)(instPC + 4) & 0xF0000000); break; case OP_BEQ: case OP_BEQL: if (regsPtr[inst.RType.rs] == regsPtr[inst.RType.rt]) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BNE: case OP_BNEL: if (regsPtr[inst.RType.rs] != regsPtr[inst.RType.rt]) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BLEZ: case OP_BLEZL: if ((int)(regsPtr[inst.RType.rs]) <= 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BGTZ: case OP_BGTZL: if ((int)(regsPtr[inst.RType.rs]) > 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_COP1: switch (inst.RType.rs) { case OP_BCx: case OP_BCy: if ((inst.RType.rt & COPz_BC_TF_MASK) == COPz_BC_TRUE) condition = fpcCSR & MIPS_FPU_COND_BIT; else condition = !(fpcCSR & MIPS_FPU_COND_BIT); if (condition) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; default: retAddr = instPC + 4; } break; default: retAddr = instPC + 4; } return (retAddr); } static void log_frame_dump(struct trapframe *frame) { log(LOG_ERR, "Trapframe Register Dump:\n"); log(LOG_ERR, "\tzero: %#jx\tat: %#jx\tv0: %#jx\tv1: %#jx\n", (intmax_t)0, (intmax_t)frame->ast, (intmax_t)frame->v0, (intmax_t)frame->v1); log(LOG_ERR, "\ta0: %#jx\ta1: %#jx\ta2: %#jx\ta3: %#jx\n", (intmax_t)frame->a0, (intmax_t)frame->a1, (intmax_t)frame->a2, (intmax_t)frame->a3); #if defined(__mips_n32) || defined(__mips_n64) log(LOG_ERR, "\ta4: %#jx\ta5: %#jx\ta6: %#jx\ta6: %#jx\n", (intmax_t)frame->a4, (intmax_t)frame->a5, (intmax_t)frame->a6, (intmax_t)frame->a7); log(LOG_ERR, "\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); #else log(LOG_ERR, "\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); log(LOG_ERR, "\tt4: %#jx\tt5: %#jx\tt6: %#jx\tt7: %#jx\n", (intmax_t)frame->t4, (intmax_t)frame->t5, (intmax_t)frame->t6, (intmax_t)frame->t7); #endif log(LOG_ERR, "\tt8: %#jx\tt9: %#jx\ts0: %#jx\ts1: %#jx\n", (intmax_t)frame->t8, (intmax_t)frame->t9, (intmax_t)frame->s0, (intmax_t)frame->s1); log(LOG_ERR, "\ts2: %#jx\ts3: %#jx\ts4: %#jx\ts5: %#jx\n", (intmax_t)frame->s2, (intmax_t)frame->s3, (intmax_t)frame->s4, (intmax_t)frame->s5); log(LOG_ERR, "\ts6: %#jx\ts7: %#jx\tk0: %#jx\tk1: %#jx\n", (intmax_t)frame->s6, (intmax_t)frame->s7, (intmax_t)frame->k0, (intmax_t)frame->k1); log(LOG_ERR, "\tgp: %#jx\tsp: %#jx\ts8: %#jx\tra: %#jx\n", (intmax_t)frame->gp, (intmax_t)frame->sp, (intmax_t)frame->s8, (intmax_t)frame->ra); log(LOG_ERR, "\tsr: %#jx\tmullo: %#jx\tmulhi: %#jx\tbadvaddr: %#jx\n", (intmax_t)frame->sr, (intmax_t)frame->mullo, (intmax_t)frame->mulhi, (intmax_t)frame->badvaddr); log(LOG_ERR, "\tcause: %#jx\tpc: %#jx\n", (intmax_t)frame->cause, (intmax_t)frame->pc); } #ifdef TRAP_DEBUG static void trap_frame_dump(struct trapframe *frame) { printf("Trapframe Register Dump:\n"); printf("\tzero: %#jx\tat: %#jx\tv0: %#jx\tv1: %#jx\n", (intmax_t)0, (intmax_t)frame->ast, (intmax_t)frame->v0, (intmax_t)frame->v1); printf("\ta0: %#jx\ta1: %#jx\ta2: %#jx\ta3: %#jx\n", (intmax_t)frame->a0, (intmax_t)frame->a1, (intmax_t)frame->a2, (intmax_t)frame->a3); #if defined(__mips_n32) || defined(__mips_n64) printf("\ta4: %#jx\ta5: %#jx\ta6: %#jx\ta7: %#jx\n", (intmax_t)frame->a4, (intmax_t)frame->a5, (intmax_t)frame->a6, (intmax_t)frame->a7); printf("\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); #else printf("\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); printf("\tt4: %#jx\tt5: %#jx\tt6: %#jx\tt7: %#jx\n", (intmax_t)frame->t4, (intmax_t)frame->t5, (intmax_t)frame->t6, (intmax_t)frame->t7); #endif printf("\tt8: %#jx\tt9: %#jx\ts0: %#jx\ts1: %#jx\n", (intmax_t)frame->t8, (intmax_t)frame->t9, (intmax_t)frame->s0, (intmax_t)frame->s1); printf("\ts2: %#jx\ts3: %#jx\ts4: %#jx\ts5: %#jx\n", (intmax_t)frame->s2, (intmax_t)frame->s3, (intmax_t)frame->s4, (intmax_t)frame->s5); printf("\ts6: %#jx\ts7: %#jx\tk0: %#jx\tk1: %#jx\n", (intmax_t)frame->s6, (intmax_t)frame->s7, (intmax_t)frame->k0, (intmax_t)frame->k1); printf("\tgp: %#jx\tsp: %#jx\ts8: %#jx\tra: %#jx\n", (intmax_t)frame->gp, (intmax_t)frame->sp, (intmax_t)frame->s8, (intmax_t)frame->ra); printf("\tsr: %#jx\tmullo: %#jx\tmulhi: %#jx\tbadvaddr: %#jx\n", (intmax_t)frame->sr, (intmax_t)frame->mullo, (intmax_t)frame->mulhi, (intmax_t)frame->badvaddr); printf("\tcause: %#jx\tpc: %#jx\n", (intmax_t)frame->cause, (intmax_t)frame->pc); } #endif static void get_mapping_info(vm_offset_t va, pd_entry_t **pdepp, pt_entry_t **ptepp) { pt_entry_t *ptep; pd_entry_t *pdep; struct proc *p = curproc; pdep = (&(p->p_vmspace->vm_pmap.pm_segtab[(va >> SEGSHIFT) & (NPDEPG - 1)])); if (*pdep) ptep = pmap_pte(&p->p_vmspace->vm_pmap, va); else ptep = (pt_entry_t *)0; *pdepp = pdep; *ptepp = ptep; } static void log_illegal_instruction(const char *msg, struct trapframe *frame) { pt_entry_t *ptep; pd_entry_t *pdep; unsigned int *addr, instr[4]; struct thread *td; struct proc *p; register_t pc; td = curthread; p = td->td_proc; #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); log(LOG_ERR, "%s: pid %d tid %ld (%s), uid %d: pc %#jx ra %#jx\n", msg, p->p_pid, (long)td->td_tid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, (intmax_t)pc, (intmax_t)frame->ra); /* log registers in trap frame */ log_frame_dump(frame); get_mapping_info((vm_offset_t)pc, &pdep, &ptep); /* * Dump a few words around faulting instruction, if the addres is * valid. */ addr = (unsigned int *)(intptr_t)pc; if ((pc & 3) == 0 && copyin(addr, instr, sizeof(instr)) == 0) { /* dump page table entry for faulting instruction */ log(LOG_ERR, "Page table info for pc address %#jx: pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); log(LOG_ERR, "Dumping 4 words starting at pc address %p: \n", addr); log(LOG_ERR, "%08x %08x %08x %08x\n", instr[0], instr[1], instr[2], instr[3]); } else { log(LOG_ERR, "pc address %#jx is inaccessible, pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); } } static void log_bad_page_fault(char *msg, struct trapframe *frame, int trap_type) { pt_entry_t *ptep; pd_entry_t *pdep; unsigned int *addr, instr[4]; struct thread *td; struct proc *p; char *read_or_write; register_t pc; trap_type &= ~T_USER; td = curthread; p = td->td_proc; #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif switch (trap_type) { case T_TLB_MOD: case T_TLB_ST_MISS: case T_ADDR_ERR_ST: read_or_write = "write"; break; case T_TLB_LD_MISS: case T_ADDR_ERR_LD: case T_BUS_ERR_IFETCH: read_or_write = "read"; break; default: read_or_write = "unknown"; } pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); log(LOG_ERR, "%s: pid %d tid %ld (%s), uid %d: pc %#jx got a %s fault " "(type %#x) at %#jx\n", msg, p->p_pid, (long)td->td_tid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, (intmax_t)pc, read_or_write, trap_type, (intmax_t)frame->badvaddr); /* log registers in trap frame */ log_frame_dump(frame); get_mapping_info((vm_offset_t)pc, &pdep, &ptep); /* * Dump a few words around faulting instruction, if the addres is * valid. */ addr = (unsigned int *)(intptr_t)pc; if ((pc & 3) == 0 && pc != frame->badvaddr && trap_type != T_BUS_ERR_IFETCH && copyin((caddr_t)(intptr_t)pc, instr, sizeof(instr)) == 0) { /* dump page table entry for faulting instruction */ log(LOG_ERR, "Page table info for pc address %#jx: pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); log(LOG_ERR, "Dumping 4 words starting at pc address %p: \n", addr); log(LOG_ERR, "%08x %08x %08x %08x\n", instr[0], instr[1], instr[2], instr[3]); } else { log(LOG_ERR, "pc address %#jx is inaccessible, pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); } get_mapping_info((vm_offset_t)frame->badvaddr, &pdep, &ptep); log(LOG_ERR, "Page table info for bad address %#jx: pde = %p, pte = %#jx\n", (intmax_t)frame->badvaddr, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); } /* * Unaligned load/store emulation */ static int mips_unaligned_load_store(struct trapframe *frame, int mode, register_t addr, register_t pc) { register_t *reg = (register_t *) frame; u_int32_t inst = *((u_int32_t *)(intptr_t)pc); register_t value_msb = 0, value = 0; unsigned size; /* * ADDR_ERR faults have higher priority than TLB * Miss faults. Therefore, it is necessary to * verify that the faulting address is a valid * virtual address within the process' address space * before trying to emulate the unaligned access. */ switch (MIPS_INST_OPCODE(inst)) { case OP_LHU: case OP_LH: case OP_SH: size = 2; break; case OP_LWU: case OP_LW: case OP_SW: size = 4; break; case OP_LD: case OP_SD: size = 8; break; default: printf("%s: unhandled opcode in address error: %#x\n", __func__, MIPS_INST_OPCODE(inst)); return (0); } if (!useracc((void *)rounddown2((vm_offset_t)addr, size), size * 2, mode)) return (0); /* * XXX * Handle LL/SC LLD/SCD. */ switch (MIPS_INST_OPCODE(inst)) { case OP_LHU: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lbu_macro(value_msb, addr); addr += 1; lbu_macro(value, addr); value |= value_msb << 8; reg[MIPS_INST_RT(inst)] = value; return (MIPS_LHU_ACCESS); case OP_LH: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lb_macro(value_msb, addr); addr += 1; lbu_macro(value, addr); value |= value_msb << 8; reg[MIPS_INST_RT(inst)] = value; return (MIPS_LH_ACCESS); case OP_LWU: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lwl_macro(value, addr); addr += 3; lwr_macro(value, addr); value &= 0xffffffff; reg[MIPS_INST_RT(inst)] = value; return (MIPS_LWU_ACCESS); case OP_LW: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lwl_macro(value, addr); addr += 3; lwr_macro(value, addr); reg[MIPS_INST_RT(inst)] = value; return (MIPS_LW_ACCESS); #if defined(__mips_n32) || defined(__mips_n64) case OP_LD: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); ldl_macro(value, addr); addr += 7; ldr_macro(value, addr); reg[MIPS_INST_RT(inst)] = value; return (MIPS_LD_ACCESS); #endif case OP_SH: KASSERT(mode == VM_PROT_WRITE, ("access mode must be write for store instruction.")); value = reg[MIPS_INST_RT(inst)]; value_msb = value >> 8; sb_macro(value_msb, addr); addr += 1; sb_macro(value, addr); return (MIPS_SH_ACCESS); case OP_SW: KASSERT(mode == VM_PROT_WRITE, ("access mode must be write for store instruction.")); value = reg[MIPS_INST_RT(inst)]; swl_macro(value, addr); addr += 3; swr_macro(value, addr); return (MIPS_SW_ACCESS); #if defined(__mips_n32) || defined(__mips_n64) case OP_SD: KASSERT(mode == VM_PROT_WRITE, ("access mode must be write for store instruction.")); value = reg[MIPS_INST_RT(inst)]; sdl_macro(value, addr); addr += 7; sdr_macro(value, addr); return (MIPS_SD_ACCESS); #endif } panic("%s: should not be reached.", __func__); } /* * XXX TODO: SMP? */ static struct timeval unaligned_lasterr; static int unaligned_curerr; static int unaligned_pps_log_limit = 4; SYSCTL_INT(_machdep, OID_AUTO, unaligned_log_pps_limit, CTLFLAG_RWTUN, &unaligned_pps_log_limit, 0, "limit number of userland unaligned log messages per second"); static int emulate_unaligned_access(struct trapframe *frame, int mode) { register_t pc; int access_type = 0; struct thread *td = curthread; struct proc *p = curproc; pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); /* * Fall through if it's instruction fetch exception */ if (!((pc & 3) || (pc == frame->badvaddr))) { /* * Handle unaligned load and store */ /* * Return access type if the instruction was emulated. * Otherwise restore pc and fall through. */ access_type = mips_unaligned_load_store(frame, mode, frame->badvaddr, pc); if (access_type) { if (DELAYBRANCH(frame->cause)) frame->pc = MipsEmulateBranch(frame, frame->pc, 0, 0); else frame->pc += 4; if (ppsratecheck(&unaligned_lasterr, &unaligned_curerr, unaligned_pps_log_limit)) { /* XXX TODO: keep global/tid/pid counters? */ log(LOG_INFO, "Unaligned %s: pid=%ld (%s), tid=%ld, " "pc=%#jx, badvaddr=%#jx\n", access_name[access_type - 1], (long) p->p_pid, p->p_comm, (long) td->td_tid, (intmax_t)pc, (intmax_t)frame->badvaddr); } } } return access_type; } Index: head/sys/powerpc/include/proc.h =================================================================== --- head/sys/powerpc/include/proc.h (revision 366204) +++ head/sys/powerpc/include/proc.h (revision 366205) @@ -1,83 +1,82 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: proc.h,v 1.2 1997/04/16 22:57:48 thorpej Exp $ * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ /* * Machine-dependent part of the proc structure */ struct mdthread { int md_spinlock_count; /* (k) */ register_t md_saved_msr; /* (k) */ }; struct mdproc { /* * Avoid empty structs because they are undefined behavior. */ long md_spare; }; #ifdef __powerpc64__ #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 816 #else #define KINFO_PROC_SIZE 816 #endif struct syscall_args { u_int code; struct sysent *callp; register_t args[10]; - int narg; }; #ifdef _KERNEL #include /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ (total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb); \ (used) = (char *)td->td_kstack + \ td->td_kstack_pages * PAGE_SIZE - \ (char *)&td; \ } while (0) #endif #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/powerpc/powerpc/trap.c =================================================================== --- head/sys/powerpc/powerpc/trap.c (revision 366204) +++ head/sys/powerpc/powerpc/trap.c (revision 366205) @@ -1,994 +1,994 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Below matches setjmp.S */ #define FAULTBUF_LR 21 #define FAULTBUF_R1 1 #define FAULTBUF_R2 2 #define FAULTBUF_CR 22 #define FAULTBUF_R14 3 #define MOREARGS(sp) ((caddr_t)((uintptr_t)(sp) + \ sizeof(struct callframe) - 3*sizeof(register_t))) /* more args go here */ static void trap_fatal(struct trapframe *frame); static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user); static bool trap_pfault(struct trapframe *frame, bool user, int *signo, int *ucode); static int fix_unaligned(struct thread *td, struct trapframe *frame); static int handle_onfault(struct trapframe *frame); static void syscall(struct trapframe *frame); #if defined(__powerpc64__) && defined(AIM) static void normalize_inputs(void); #endif extern vm_offset_t __startkernel; extern int copy_fault(void); extern int fusufault(void); #ifdef KDB int db_trap_glue(struct trapframe *); /* Called from trap_subr.S */ #endif struct powerpc_exception { u_int vector; char *name; }; #ifdef KDTRACE_HOOKS #include int (*dtrace_invop_jump_addr)(struct trapframe *); #endif static struct powerpc_exception powerpc_exceptions[] = { { EXC_CRIT, "critical input" }, { EXC_RST, "system reset" }, { EXC_MCHK, "machine check" }, { EXC_DSI, "data storage interrupt" }, { EXC_DSE, "data segment exception" }, { EXC_ISI, "instruction storage interrupt" }, { EXC_ISE, "instruction segment exception" }, { EXC_EXI, "external interrupt" }, { EXC_ALI, "alignment" }, { EXC_PGM, "program" }, { EXC_HEA, "hypervisor emulation assistance" }, { EXC_FPU, "floating-point unavailable" }, { EXC_APU, "auxiliary proc unavailable" }, { EXC_DECR, "decrementer" }, { EXC_FIT, "fixed-interval timer" }, { EXC_WDOG, "watchdog timer" }, { EXC_SC, "system call" }, { EXC_TRC, "trace" }, { EXC_FPA, "floating-point assist" }, { EXC_DEBUG, "debug" }, { EXC_PERF, "performance monitoring" }, { EXC_VEC, "altivec unavailable" }, { EXC_VSX, "vsx unavailable" }, { EXC_FAC, "facility unavailable" }, { EXC_ITMISS, "instruction tlb miss" }, { EXC_DLMISS, "data load tlb miss" }, { EXC_DSMISS, "data store tlb miss" }, { EXC_BPT, "instruction breakpoint" }, { EXC_SMI, "system management" }, { EXC_VECAST_G4, "altivec assist" }, { EXC_THRM, "thermal management" }, { EXC_RUNMODETRC, "run mode/trace" }, { EXC_SOFT_PATCH, "soft patch exception" }, { EXC_LAST, NULL } }; #define ESR_BITMASK \ "\20" \ "\040b0\037b1\036b2\035b3\034PIL\033PRR\032PTR\031FP" \ "\030ST\027b9\026DLK\025ILK\024b12\023b13\022BO\021PIE" \ "\020b16\017b17\016b18\015b19\014b20\013b21\012b22\011b23" \ "\010SPE\007EPID\006b26\005b27\004b28\003b29\002b30\001b31" #define MCSR_BITMASK \ "\20" \ "\040MCP\037ICERR\036DCERR\035TLBPERR\034L2MMU_MHIT\033b5\032b6\031b7" \ "\030b8\027b9\026b10\025NMI\024MAV\023MEA\022b14\021IF" \ "\020LD\017ST\016LDG\015b19\014b20\013b21\012b22\011b23" \ "\010b24\007b25\006b26\005b27\004b28\003b29\002TLBSYNC\001BSL2_ERR" #define MSSSR_BITMASK \ "\20" \ "\040b0\037b1\036b2\035b3\034b4\033b5\032b6\031b7" \ "\030b8\027b9\026b10\025b11\024b12\023L2TAG\022L2DAT\021L3TAG" \ "\020L3DAT\017APE\016DPE\015TEA\014b20\013b21\012b22\011b23" \ "\010b24\007b25\006b26\005b27\004b28\003b29\002b30\001b31" static const char * trapname(u_int vector) { struct powerpc_exception *pe; for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) { if (pe->vector == vector) return (pe->name); } return ("unknown"); } static inline bool frame_is_trap_inst(struct trapframe *frame) { #ifdef AIM return (frame->exc == EXC_PGM && frame->srr1 & EXC_PGM_TRAP); #else return ((frame->cpu.booke.esr & ESR_PTR) != 0); #endif } void trap(struct trapframe *frame) { struct thread *td; struct proc *p; #ifdef KDTRACE_HOOKS uint32_t inst; #endif int sig, type, user; u_int ucode; ksiginfo_t ksi; register_t addr, fscr; VM_CNT_INC(v_trap); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; p = td->td_proc; type = ucode = frame->exc; sig = 0; user = frame->srr1 & PSL_PR; addr = 0; CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name, trapname(type), user ? "user" : "kernel"); #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0) return; #endif if (user) { td->td_pticks = 0; td->td_frame = frame; addr = frame->srr0; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); /* User Mode Traps */ switch (type) { case EXC_RUNMODETRC: case EXC_TRC: frame->srr1 &= ~PSL_SE; sig = SIGTRAP; ucode = TRAP_TRACE; break; #if defined(__powerpc64__) && defined(AIM) case EXC_DSE: addr = frame->dar; /* FALLTHROUGH */ case EXC_ISE: /* DSE/ISE are automatically fatal with radix pmap. */ if (radix_mmu || handle_user_slb_spill(&p->p_vmspace->vm_pmap, addr) != 0){ sig = SIGSEGV; ucode = SEGV_MAPERR; } break; #endif case EXC_DSI: addr = frame->dar; /* FALLTHROUGH */ case EXC_ISI: if (trap_pfault(frame, true, &sig, &ucode)) sig = 0; break; case EXC_SC: syscall(frame); break; case EXC_FPU: KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU, ("FPU already enabled for thread")); enable_fpu(td); break; case EXC_VEC: KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC, ("Altivec already enabled for thread")); enable_vec(td); break; case EXC_VSX: KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX, ("VSX already enabled for thread")); if (!(td->td_pcb->pcb_flags & PCB_VEC)) enable_vec(td); if (td->td_pcb->pcb_flags & PCB_FPU) save_fpu(td); td->td_pcb->pcb_flags |= PCB_VSX; enable_fpu(td); break; case EXC_FAC: fscr = mfspr(SPR_FSCR); switch (fscr & FSCR_IC_MASK) { case FSCR_IC_HTM: CTR0(KTR_TRAP, "Hardware Transactional Memory subsystem disabled"); sig = SIGILL; ucode = ILL_ILLOPC; break; case FSCR_IC_DSCR: td->td_pcb->pcb_flags |= PCB_CFSCR | PCB_CDSCR; fscr |= FSCR_DSCR; mtspr(SPR_DSCR, 0); break; case FSCR_IC_EBB: td->td_pcb->pcb_flags |= PCB_CFSCR; fscr |= FSCR_EBB; mtspr(SPR_EBBHR, 0); mtspr(SPR_EBBRR, 0); mtspr(SPR_BESCR, 0); break; case FSCR_IC_TAR: td->td_pcb->pcb_flags |= PCB_CFSCR; fscr |= FSCR_TAR; mtspr(SPR_TAR, 0); break; case FSCR_IC_LM: td->td_pcb->pcb_flags |= PCB_CFSCR; fscr |= FSCR_LM; mtspr(SPR_LMRR, 0); mtspr(SPR_LMSER, 0); break; default: sig = SIGILL; ucode = ILL_ILLOPC; } mtspr(SPR_FSCR, fscr & ~FSCR_IC_MASK); break; case EXC_HEA: sig = SIGILL; ucode = ILL_ILLOPC; break; case EXC_VECAST_E: case EXC_VECAST_G4: case EXC_VECAST_G5: /* * We get a VPU assist exception for IEEE mode * vector operations on denormalized floats. * Emulating this is a giant pain, so for now, * just switch off IEEE mode and treat them as * zero. */ save_vec(td); td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ; enable_vec(td); break; case EXC_ALI: if (fix_unaligned(td, frame) != 0) { sig = SIGBUS; ucode = BUS_ADRALN; addr = frame->dar; } else frame->srr0 += 4; break; case EXC_DEBUG: /* Single stepping */ mtspr(SPR_DBSR, mfspr(SPR_DBSR)); frame->srr1 &= ~PSL_DE; frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); sig = SIGTRAP; ucode = TRAP_TRACE; break; case EXC_PGM: /* Identify the trap reason */ if (frame_is_trap_inst(frame)) { #ifdef KDTRACE_HOOKS inst = fuword32((const void *)frame->srr0); if (inst == 0x0FFFDDDD && dtrace_pid_probe_ptr != NULL) { (*dtrace_pid_probe_ptr)(frame); break; } #endif sig = SIGTRAP; ucode = TRAP_BRKPT; } else { sig = ppc_instr_emulate(frame, td); if (sig == SIGILL) { if (frame->srr1 & EXC_PGM_PRIV) ucode = ILL_PRVOPC; else if (frame->srr1 & EXC_PGM_ILLEGAL) ucode = ILL_ILLOPC; } else if (sig == SIGFPE) ucode = FPE_FLTINV; /* Punt for now, invalid operation. */ } break; case EXC_MCHK: sig = cpu_machine_check(td, frame, &ucode); printtrap(frame->exc, frame, 0, (frame->srr1 & PSL_PR)); break; #if defined(__powerpc64__) && defined(AIM) case EXC_SOFT_PATCH: /* * Point to the instruction that generated the exception to execute it again, * and normalize the register values. */ frame->srr0 -= 4; normalize_inputs(); break; #endif default: trap_fatal(frame); } } else { /* Kernel Mode Traps */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case EXC_PGM: #ifdef KDTRACE_HOOKS if (frame_is_trap_inst(frame)) { if (*(uint32_t *)frame->srr0 == EXC_DTRACE) { if (dtrace_invop_jump_addr != NULL) { dtrace_invop_jump_addr(frame); return; } } } #endif #ifdef KDB if (db_trap_glue(frame)) return; #endif break; #if defined(__powerpc64__) && defined(AIM) case EXC_DSE: /* DSE on radix mmu is automatically fatal. */ if (radix_mmu) break; if (td->td_pcb->pcb_cpu.aim.usr_vsid != 0 && (frame->dar & SEGMENT_MASK) == USER_ADDR) { __asm __volatile ("slbmte %0, %1" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); return; } break; #endif case EXC_DSI: if (trap_pfault(frame, false, NULL, NULL)) return; break; case EXC_MCHK: if (handle_onfault(frame)) return; break; default: break; } trap_fatal(frame); } if (sig != 0) { if (p->p_sysent->sv_transtrap != NULL) sig = (p->p_sysent->sv_transtrap)(sig, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = (int) ucode; /* XXX, not POSIX */ ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; trapsignal(td, &ksi); } userret(td, frame); } static void trap_fatal(struct trapframe *frame) { #ifdef KDB bool handled; #endif printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR)); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(frame->exc, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("%s trap", trapname(frame->exc)); } static void cpu_printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) { #ifdef AIM uint16_t ver; switch (vector) { case EXC_MCHK: ver = mfpvr() >> 16; if (MPC745X_P(ver)) printf(" msssr0 = 0x%b\n", (int)mfspr(SPR_MSSSR0), MSSSR_BITMASK); case EXC_DSE: case EXC_DSI: case EXC_DTMISS: printf(" dsisr = 0x%lx\n", (u_long)frame->cpu.aim.dsisr); break; } #elif defined(BOOKE) vm_paddr_t pa; switch (vector) { case EXC_MCHK: pa = mfspr(SPR_MCARU); pa = (pa << 32) | (u_register_t)mfspr(SPR_MCAR); printf(" mcsr = 0x%b\n", (int)mfspr(SPR_MCSR), MCSR_BITMASK); printf(" mcar = 0x%jx\n", (uintmax_t)pa); } printf(" esr = 0x%b\n", (int)frame->cpu.booke.esr, ESR_BITMASK); #endif } static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) { printf("\n"); printf("%s %s trap:\n", isfatal ? "fatal" : "handled", user ? "user" : "kernel"); printf("\n"); printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); switch (vector) { case EXC_DSE: case EXC_DSI: case EXC_DTMISS: case EXC_ALI: case EXC_MCHK: printf(" virtual address = 0x%" PRIxPTR "\n", frame->dar); break; case EXC_ISE: case EXC_ISI: case EXC_ITMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->srr0); break; } cpu_printtrap(vector, frame, isfatal, user); printf(" srr0 = 0x%" PRIxPTR " (0x%" PRIxPTR ")\n", frame->srr0, frame->srr0 - (register_t)(__startkernel - KERNBASE)); printf(" srr1 = 0x%lx\n", (u_long)frame->srr1); printf(" current msr = 0x%" PRIxPTR "\n", mfmsr()); printf(" lr = 0x%" PRIxPTR " (0x%" PRIxPTR ")\n", frame->lr, frame->lr - (register_t)(__startkernel - KERNBASE)); printf(" frame = %p\n", frame); printf(" curthread = %p\n", curthread); if (curthread != NULL) printf(" pid = %d, comm = %s\n", curthread->td_proc->p_pid, curthread->td_name); printf("\n"); } /* * Handles a fatal fault when we have onfault state to recover. Returns * non-zero if there was onfault recovery state available. */ static int handle_onfault(struct trapframe *frame) { struct thread *td; jmp_buf *fb; td = curthread; #if defined(__powerpc64__) || defined(BOOKE) uintptr_t dispatch = (uintptr_t)td->td_pcb->pcb_onfault; if (dispatch == 0) return (0); /* Short-circuit radix and Book-E paths. */ switch (dispatch) { case COPYFAULT: frame->srr0 = (uintptr_t)copy_fault; return (1); case FUSUFAULT: frame->srr0 = (uintptr_t)fusufault; return (1); default: break; } #endif fb = td->td_pcb->pcb_onfault; if (fb != NULL) { frame->srr0 = (*fb)->_jb[FAULTBUF_LR]; frame->fixreg[1] = (*fb)->_jb[FAULTBUF_R1]; frame->fixreg[2] = (*fb)->_jb[FAULTBUF_R2]; frame->fixreg[3] = 1; frame->cr = (*fb)->_jb[FAULTBUF_CR]; bcopy(&(*fb)->_jb[FAULTBUF_R14], &frame->fixreg[14], 18 * sizeof(register_t)); td->td_pcb->pcb_onfault = NULL; /* Returns twice, not thrice */ return (1); } return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; size_t argsz; - int error, n, i; + int error, n, narg, i; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->fixreg[0]; params = (caddr_t)(frame->fixreg + FIRSTARG); n = NARGREG; if (sa->code == SYS_syscall) { /* * code is first argument, * followed by actual args. */ sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, * so as to maintain quad alignment * for the rest of the args. */ if (SV_PROC_FLAG(p, SV_ILP32)) { params += sizeof(register_t); sa->code = *(register_t *) params; params += sizeof(register_t); n -= 2; } else { sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; + narg = sa->callp->sy_narg; if (SV_PROC_FLAG(p, SV_ILP32)) { argsz = sizeof(uint32_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i] & 0xffffffff; } else { argsz = sizeof(uint64_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i]; } - if (sa->narg > n) + if (narg > n) error = copyin(MOREARGS(frame->fixreg[1]), sa->args + n, - (sa->narg - n) * argsz); + (narg - n) * argsz); else error = 0; #ifdef __powerpc64__ - if (SV_PROC_FLAG(p, SV_ILP32) && sa->narg > n) { + if (SV_PROC_FLAG(p, SV_ILP32) && narg > n) { /* Expand the size of arguments copied from the stack */ - for (i = sa->narg; i >= n; i--) + for (i = narg; i >= n; i--) sa->args[i] = ((uint32_t *)(&sa->args[n]))[i-n]; } #endif if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->fixreg[FIRSTARG + 1]; } return (error); } #include "../../kern/subr_syscall.c" void syscall(struct trapframe *frame) { struct thread *td; td = curthread; td->td_frame = frame; #if defined(__powerpc64__) && defined(AIM) /* * Speculatively restore last user SLB segment, which we know is * invalid already, since we are likely to do copyin()/copyout(). */ if (td->td_pcb->pcb_cpu.aim.usr_vsid != 0) __asm __volatile ("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #endif syscallenter(td); syscallret(td); } static bool trap_pfault(struct trapframe *frame, bool user, int *signo, int *ucode) { vm_offset_t eva; struct thread *td; struct proc *p; vm_map_t map; vm_prot_t ftype; int rv, is_user; td = curthread; p = td->td_proc; if (frame->exc == EXC_ISI) { eva = frame->srr0; ftype = VM_PROT_EXECUTE; if (frame->srr1 & SRR1_ISI_PFAULT) ftype |= VM_PROT_READ; } else { eva = frame->dar; #ifdef BOOKE if (frame->cpu.booke.esr & ESR_ST) #else if (frame->cpu.aim.dsisr & DSISR_STORE) #endif ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; } #if defined(__powerpc64__) && defined(AIM) if (radix_mmu && pmap_nofault(&p->p_vmspace->vm_pmap, eva, ftype) == 0) return (true); #endif if (__predict_false((td->td_pflags & TDP_NOFAULTING) == 0)) { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame); return (false); } } if (user) { KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); map = &p->p_vmspace->vm_map; } else { rv = pmap_decode_kernel_ptr(eva, &is_user, &eva); if (rv != 0) return (false); if (is_user) map = &p->p_vmspace->vm_map; else map = kernel_map; } /* Fault in the page. */ rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ if (rv == KERN_SUCCESS) return (true); if (!user && handle_onfault(frame)) return (true); return (false); } /* * For now, this only deals with the particular unaligned access case * that gcc tends to generate. Eventually it should handle all of the * possibilities that can happen on a 32-bit PowerPC in big-endian mode. */ static int fix_unaligned(struct thread *td, struct trapframe *frame) { struct thread *fputhread; #ifdef BOOKE uint32_t inst; #endif int indicator, reg; double *fpr; #ifdef __SPE__ indicator = (frame->cpu.booke.esr & (ESR_ST|ESR_SPE)); if (indicator & ESR_SPE) { if (copyin((void *)frame->srr0, &inst, sizeof(inst)) != 0) return (-1); reg = EXC_ALI_INST_RST(inst); fpr = (double *)td->td_pcb->pcb_vec.vr[reg]; fputhread = PCPU_GET(vecthread); /* Juggle the SPE to ensure that we've initialized * the registers, and that their current state is in * the PCB. */ if (fputhread != td) { if (fputhread) save_vec(fputhread); enable_vec(td); } save_vec(td); if (!(indicator & ESR_ST)) { if (copyin((void *)frame->dar, fpr, sizeof(double)) != 0) return (-1); frame->fixreg[reg] = td->td_pcb->pcb_vec.vr[reg][1]; enable_vec(td); } else { td->td_pcb->pcb_vec.vr[reg][1] = frame->fixreg[reg]; if (copyout(fpr, (void *)frame->dar, sizeof(double)) != 0) return (-1); } return (0); } #else #ifdef BOOKE indicator = (frame->cpu.booke.esr & ESR_ST) ? EXC_ALI_STFD : EXC_ALI_LFD; #else indicator = EXC_ALI_OPCODE_INDICATOR(frame->cpu.aim.dsisr); #endif switch (indicator) { case EXC_ALI_LFD: case EXC_ALI_STFD: #ifdef BOOKE if (copyin((void *)frame->srr0, &inst, sizeof(inst)) != 0) return (-1); reg = EXC_ALI_INST_RST(inst); #else reg = EXC_ALI_RST(frame->cpu.aim.dsisr); #endif fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr; fputhread = PCPU_GET(fputhread); /* Juggle the FPU to ensure that we've initialized * the FPRs, and that their current state is in * the PCB. */ if (fputhread != td) { if (fputhread) save_fpu(fputhread); enable_fpu(td); } save_fpu(td); if (indicator == EXC_ALI_LFD) { if (copyin((void *)frame->dar, fpr, sizeof(double)) != 0) return (-1); enable_fpu(td); } else { if (copyout(fpr, (void *)frame->dar, sizeof(double)) != 0) return (-1); } return (0); break; } #endif return (-1); } #if defined(__powerpc64__) && defined(AIM) #define MSKNSHL(x, m, n) "(((" #x ") & " #m ") << " #n ")" #define MSKNSHR(x, m, n) "(((" #x ") & " #m ") >> " #n ")" /* xvcpsgndp instruction, built in opcode format. * This can be changed to use mnemonic after a toolchain update. */ #define XVCPSGNDP(xt, xa, xb) \ __asm __volatile(".long (" \ MSKNSHL(60, 0x3f, 26) " | " \ MSKNSHL(xt, 0x1f, 21) " | " \ MSKNSHL(xa, 0x1f, 16) " | " \ MSKNSHL(xb, 0x1f, 11) " | " \ MSKNSHL(240, 0xff, 3) " | " \ MSKNSHR(xa, 0x20, 3) " | " \ MSKNSHR(xa, 0x20, 4) " | " \ MSKNSHR(xa, 0x20, 5) ")") /* Macros to normalize 1 or 10 VSX registers */ #define NORM(x) XVCPSGNDP(x, x, x) #define NORM10(x) \ NORM(x ## 0); NORM(x ## 1); NORM(x ## 2); NORM(x ## 3); NORM(x ## 4); \ NORM(x ## 5); NORM(x ## 6); NORM(x ## 7); NORM(x ## 8); NORM(x ## 9) static void normalize_inputs(void) { unsigned long msr; /* enable VSX */ msr = mfmsr(); mtmsr(msr | PSL_VSX); NORM(0); NORM(1); NORM(2); NORM(3); NORM(4); NORM(5); NORM(6); NORM(7); NORM(8); NORM(9); NORM10(1); NORM10(2); NORM10(3); NORM10(4); NORM10(5); NORM(60); NORM(61); NORM(62); NORM(63); /* restore MSR */ mtmsr(msr); } #endif #ifdef KDB int db_trap_glue(struct trapframe *frame) { if (!(frame->srr1 & PSL_PR) && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC || frame_is_trap_inst(frame) || frame->exc == EXC_BPT || frame->exc == EXC_DEBUG || frame->exc == EXC_DSI)) { int type = frame->exc; /* Ignore DTrace traps. */ if (*(uint32_t *)frame->srr0 == EXC_DTRACE) return (0); if (frame_is_trap_inst(frame)) { type = T_BREAKPOINT; } return (kdb_trap(type, 0, frame)); } return (0); } #endif Index: head/sys/riscv/include/proc.h =================================================================== --- head/sys/riscv/include/proc.h (revision 366204) +++ head/sys/riscv/include/proc.h (revision 366205) @@ -1,56 +1,55 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)proc.h 7.1 (Berkeley) 5/15/91 * from: FreeBSD: src/sys/i386/include/proc.h,v 1.11 2001/06/29 * $FreeBSD$ */ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ struct mdthread { int md_spinlock_count; /* (k) */ register_t md_saved_sstatus_ie; /* (k) */ }; struct mdproc { int dummy; }; #define KINFO_PROC_SIZE 1088 #define MAXARGS 8 struct syscall_args { u_int code; struct sysent *callp; register_t args[MAXARGS]; - int narg; }; #endif /* !_MACHINE_PROC_H_ */ Index: head/sys/riscv/riscv/trap.c =================================================================== --- head/sys/riscv/riscv/trap.c (revision 366204) +++ head/sys/riscv/riscv/trap.c (revision 366205) @@ -1,381 +1,380 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #ifdef FPE #include #endif #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif int (*dtrace_invop_jump_addr)(struct trapframe *); extern register_t fsu_intr_fault; /* Called from exception.S */ void do_trap_supervisor(struct trapframe *); void do_trap_user(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; ksi.ksi_trapno = trapno; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int nap; nap = NARGREG; p = td->td_proc; sa = &td->td_sa; ap = &td->td_frame->tf_a[0]; sa->code = td->td_frame->tf_t[0]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; - sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); - if (sa->narg > nap) + if (sa->callp->sy_narg > nap) panic("TODO: Could we have more then %d args?", NARGREG); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void dump_regs(struct trapframe *frame) { int n; int i; n = nitems(frame->tf_t); for (i = 0; i < n; i++) printf("t[%d] == 0x%016lx\n", i, frame->tf_t[i]); n = nitems(frame->tf_s); for (i = 0; i < n; i++) printf("s[%d] == 0x%016lx\n", i, frame->tf_s[i]); n = nitems(frame->tf_a); for (i = 0; i < n; i++) printf("a[%d] == 0x%016lx\n", i, frame->tf_a[i]); printf("ra == 0x%016lx\n", frame->tf_ra); printf("sp == 0x%016lx\n", frame->tf_sp); printf("gp == 0x%016lx\n", frame->tf_gp); printf("tp == 0x%016lx\n", frame->tf_tp); printf("sepc == 0x%016lx\n", frame->tf_sepc); printf("sstatus == 0x%016lx\n", frame->tf_sstatus); } static void svc_handler(struct trapframe *frame) { struct thread *td; td = curthread; td->td_frame = frame; syscallenter(td); syscallret(td); } static void data_abort(struct trapframe *frame, int usermode) { struct vm_map *map; uint64_t stval; struct thread *td; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; struct proc *p; int error, sig, ucode; #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; p = td->td_proc; pcb = td->td_pcb; stval = frame->tf_stval; if (td->td_critnest != 0 || td->td_intr_nesting_level != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) goto fatal; if (usermode) { map = &td->td_proc->p_vmspace->vm_map; } else { /* * Enable interrupts for the duration of the page fault. For * user faults this was done already in do_trap_user(). */ intr_enable(); if (stval >= VM_MAX_USER_ADDRESS) { map = kernel_map; } else { if (pcb->pcb_onfault == 0) goto fatal; map = &td->td_proc->p_vmspace->vm_map; } } va = trunc_page(stval); if ((frame->tf_scause == EXCP_FAULT_STORE) || (frame->tf_scause == EXCP_STORE_PAGE_FAULT)) { ftype = VM_PROT_WRITE; } else if (frame->tf_scause == EXCP_INST_PAGE_FAULT) { ftype = VM_PROT_EXECUTE; } else { ftype = VM_PROT_READ; } if (pmap_fault_fixup(map->pmap, va, ftype)) goto done; error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { if (usermode) { call_trapsignal(td, sig, ucode, (void *)stval, frame->tf_scause & EXCP_MASK); } else { if (pcb->pcb_onfault != 0) { frame->tf_a[0] = error; frame->tf_sepc = pcb->pcb_onfault; return; } goto fatal; } } done: if (usermode) userret(td, frame); return; fatal: dump_regs(frame); panic("Fatal page fault at %#lx: %#016lx", frame->tf_sepc, stval); } void do_trap_supervisor(struct trapframe *frame) { uint64_t exception; /* Ensure we came from supervisor mode, interrupts disabled */ KASSERT((csr_read(sstatus) & (SSTATUS_SPP | SSTATUS_SIE)) == SSTATUS_SPP, ("Came from S mode with interrupts enabled")); exception = frame->tf_scause & EXCP_MASK; if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch (exception) { case EXCP_FAULT_LOAD: case EXCP_FAULT_STORE: case EXCP_FAULT_FETCH: case EXCP_STORE_PAGE_FAULT: case EXCP_LOAD_PAGE_FAULT: data_abort(frame, 0); break; case EXCP_BREAKPOINT: #ifdef KDTRACE_HOOKS if (dtrace_invop_jump_addr != NULL && dtrace_invop_jump_addr(frame) == 0) break; #endif #ifdef KDB kdb_trap(exception, 0, frame); #else dump_regs(frame); panic("No debugger in kernel.\n"); #endif break; case EXCP_ILLEGAL_INSTRUCTION: dump_regs(frame); panic("Illegal instruction at 0x%016lx\n", frame->tf_sepc); break; default: dump_regs(frame); panic("Unknown kernel exception %lx trap value %lx\n", exception, frame->tf_stval); } } void do_trap_user(struct trapframe *frame) { uint64_t exception; struct thread *td; struct pcb *pcb; td = curthread; td->td_frame = frame; pcb = td->td_pcb; /* Ensure we came from usermode, interrupts disabled */ KASSERT((csr_read(sstatus) & (SSTATUS_SPP | SSTATUS_SIE)) == 0, ("Came from U mode with interrupts enabled")); exception = frame->tf_scause & EXCP_MASK; if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } intr_enable(); CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch (exception) { case EXCP_FAULT_LOAD: case EXCP_FAULT_STORE: case EXCP_FAULT_FETCH: case EXCP_STORE_PAGE_FAULT: case EXCP_LOAD_PAGE_FAULT: case EXCP_INST_PAGE_FAULT: data_abort(frame, 1); break; case EXCP_USER_ECALL: frame->tf_sepc += 4; /* Next instruction */ svc_handler(frame); break; case EXCP_ILLEGAL_INSTRUCTION: #ifdef FPE if ((pcb->pcb_fpflags & PCB_FP_STARTED) == 0) { /* * May be a FPE trap. Enable FPE usage * for this thread and try again. */ fpe_state_clear(); frame->tf_sstatus &= ~SSTATUS_FS_MASK; frame->tf_sstatus |= SSTATUS_FS_CLEAN; pcb->pcb_fpflags |= PCB_FP_STARTED; break; } #endif call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)frame->tf_sepc, exception); userret(td, frame); break; case EXCP_BREAKPOINT: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_sepc, exception); userret(td, frame); break; default: dump_regs(frame); panic("Unknown userland exception %lx, trap value %lx\n", exception, frame->tf_stval); } }