Index: head/sys/amd64/amd64/elf_machdep.c =================================================================== --- head/sys/amd64/amd64/elf_machdep.c (revision 342242) +++ head/sys/amd64/amd64/elf_machdep.c (revision 342243) @@ -1,331 +1,330 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); void amd64_lower_shared_page(struct sysentvec *sv) { if (hw_lower_amd64_sharedpage != 0) { sv->sv_maxuser -= PAGE_SIZE; sv->sv_shared_page_base -= PAGE_SIZE; sv->sv_usrstack -= PAGE_SIZE; sv->sv_psstrings -= PAGE_SIZE; } } /* * Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter * uses the value of sv_shared_page_base. */ SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) amd64_lower_shared_page, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); static Elf64_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld-kfreebsd-x86-64.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &kfreebsd_brand_info); void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf64_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_X86_64_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { Elf64_Addr *where, val; Elf32_Addr *where32, val32; Elf_Addr addr; Elf_Addr addend; Elf_Size rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); /* Addend is 32 bit on 32 bit relocs */ switch (rtype) { case R_X86_64_PC32: case R_X86_64_32S: case R_X86_64_PLT32: addend = *(Elf32_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } switch (rtype) { case R_X86_64_NONE: /* none */ break; case R_X86_64_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); val = addr + addend; if (error != 0) return -1; if (*where != val) *where = val; break; case R_X86_64_PC32: /* S + A - P */ case R_X86_64_PLT32: /* L + A - P, L is PLT location for the symbol, which we treat as S */ error = lookup(lf, symidx, 1, &addr); where32 = (Elf32_Addr *)where; val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where); if (error != 0) return -1; if (*where32 != val32) *where32 = val32; break; case R_X86_64_32S: /* S + A sign extend */ error = lookup(lf, symidx, 1, &addr); val32 = (Elf32_Addr)(addr + addend); where32 = (Elf32_Addr *)where; if (error != 0) return -1; if (*where32 != val32) *where32 = val32; break; case R_X86_64_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return (-1); break; case R_X86_64_GLOB_DAT: /* S */ case R_X86_64_JMP_SLOT: /* XXX need addend + offset */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_X86_64_RELATIVE: /* B + A */ addr = relocbase + addend; val = addr; if (*where != val) *where = val; break; case R_X86_64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %ld\n", rtype); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/amd64/amd64/trap.c =================================================================== --- head/sys/amd64/amd64/trap.c (revision 342242) +++ head/sys/amd64/amd64/trap.c (revision 342243) @@ -1,1193 +1,1188 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * AMD64 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include #endif extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); void __noinline trap(struct trapframe *frame); void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "debug exception", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; static int prot_fault_translation; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Control L1D flush on return from NMI. * * Tunable can be set to the following values: * 0 - only enable flush on return from NMI if required by vmm.ko (default) * >1 - always flush on return from NMI. * * Post-boot, the sysctl indicates if flushing is currently enabled. */ int nmi_flush_l1d_sw; SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN, &nmi_flush_l1d_sw, 0, "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; register_t addr, dr6; int signo, ucode; u_int type; td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI. If the PMC module is * active, pass the 'rip' value to the PMC module's interrupt * handler. A non-zero return value from the handler means that * the NMI was consumed by it and we can return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * We shouldn't enable interrupts while holding a * spin lock. */ if (td->td_md.md_spinlock_count == 0) enable_intr(); } } if (TRAPF_USERMODE(frame)) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ enable_intr(); #ifdef KDTRACE_HOOKS if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(frame) == 0) return; #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_rflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ /* * Emulator can take care about this trap? */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) return; addr = frame->tf_addr; signo = trap_pfault(frame, TRUE); if (signo == -1) return; if (signo == 0) goto userret; if (signo == SIGSEGV) { ucode = SEGV_MAPERR; } else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = T_PAGEFLT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = T_PAGEFLT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); return; #endif case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: /* transparent fault (due to context switch "late") */ KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); if (dtrace_return_probe_ptr != NULL) dtrace_return_probe_ptr(frame); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * For now, supporting kernel handler * registration for FPU traps is overkill. */ trap_fatal(frame, 0); return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %rip's and %rsp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * In case of PTI, the IRETQ faulted while the * kernel used the pti stack, and exception * frame records %rsp value pointing to that * stack. If we return normally to * doreti_iret_fault, the trapframe is * reconstructed on pti stack, and calltrap() * called on it as well. Due to the very * limited pti stack size, kernel does not * survive for too long. Switch to the normal * thread stack for the trap handling. * * Magic '5' is the number of qwords occupied by * the hardware trap frame. */ if (frame->tf_rip == (long)doreti_iret) { frame->tf_rip = (long)doreti_iret_fault; if ((PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3) && (frame->tf_rsp == (uintptr_t)PCPU_GET( pti_rsp0) - 5 * sizeof(register_t))) { frame->tf_rsp = PCPU_GET(rsp0) - 5 * sizeof(register_t); } return; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; return; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; return; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; return; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; return; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; return; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (pti) { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall_pti) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall_pti) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti)) return; } else { if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) || #ifdef COMPAT_FREEBSD32 frame->tf_rip == (uintptr_t)IDTVEC(int0x80_syscall) || #endif frame->tf_rip == (uintptr_t)IDTVEC(bpt)) return; } if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) || /* Needed for AMD. */ frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); return; #endif } trap_fatal(frame, 0); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), fubyte((void *)(frame->tf_rip + 3)), fubyte((void *)(frame->tf_rip + 4)), fubyte((void *)(frame->tf_rip + 5)), fubyte((void *)(frame->tf_rip + 6)), fubyte((void *)(frame->tf_rip + 7))); } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } /* * Ensure that we ignore any DTrace-induced faults. This function cannot * be instrumented, so it cannot generate such faults itself. */ void trap_check(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) return; #endif trap(frame); } static bool trap_is_smap(struct trapframe *frame) { /* * A page fault on a userspace address is classified as * SMAP-induced if: * - SMAP is supported; * - kernel mode accessed present data page; * - rflags.AC was cleared. * Kernel must never access user space with rflags.AC cleared * if SMAP is enabled. */ return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 && (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) == PGEX_P && (frame->tf_rflags & PSL_AC) == 0); } static bool trap_is_pti(struct trapframe *frame) { return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 && pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W | PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) && (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK) == (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK)); } static int trap_pfault(struct trapframe *frame, int usermode) { struct thread *td; struct proc *p; vm_map_t map; vm_offset_t va; int rv; vm_prot_t ftype; vm_offset_t eva; td = curthread; p = td->td_proc; eva = frame->tf_addr; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) return (SIGSEGV); map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a usermode address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. * * If SMAP is enabled, filter SMAP faults also, * because illegal access might occur to the mapped * user address, causing infinite loop. */ if (!usermode && (td->td_intr_nesting_level != 0 || trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * If nx protection of the usermode portion of kernel page * tables caused trap, panic. */ if (usermode && trap_is_pti(frame)) panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid, p->p_comm, frame->tf_err); /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss; u_int type; struct soft_segment_descriptor softseg; char *msg; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, TRAPF_USERMODE(frame) ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); ss = frame->tf_ss & 0xffff; printf("stack pointer = 0x%x:0x%lx\n", ss, frame->tf_rsp); printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) printf("nested task, "); if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(type, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). */ void dblfault_handler(struct trapframe *frame) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault\n" "rip %#lx rsp %#lx rbp %#lx\n" "rax %#lx rdx %#lx rbx %#lx\n" "rcx %#lx rsi %#lx rdi %#lx\n" "r8 %#lx r9 %#lx r10 %#lx\n" "r11 %#lx r12 %#lx r13 %#lx\n" "r14 %#lx r15 %#lx rflags %#lx\n" "cs %#lx ss %#lx ds %#hx es %#hx fs %#hx gs %#hx\n" "fsbase %#lx gsbase %#lx kgsbase %#lx\n", frame->tf_rip, frame->tf_rsp, frame->tf_rbp, frame->tf_rax, frame->tf_rdx, frame->tf_rbx, frame->tf_rcx, frame->tf_rdi, frame->tf_rsi, frame->tf_r8, frame->tf_r9, frame->tf_r10, frame->tf_r11, frame->tf_r12, frame->tf_r13, frame->tf_r14, frame->tf_r15, frame->tf_rflags, frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, frame->tf_fs, frame->tf_gs, rdmsr(MSR_FSBASE), rdmsr(MSR_GSBASE), rdmsr(MSR_KGSBASE)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } static int __noinline cpu_fetch_syscall_args_fallback(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; register_t *argp; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; reg = 0; regcnt = NARGREGS; sa->code = frame->tf_rax; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = frame->tf_rdi; reg++; regcnt--; } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!")); argp = &frame->tf_rdi; argp += reg; memcpy(sa->args, argp, sizeof(sa->args[0]) * NARGREGS); if (sa->narg > regcnt) { params = (caddr_t)frame->tf_rsp + sizeof(register_t); error = copyin(params, &sa->args[regcnt], (sa->narg - regcnt) * sizeof(sa->args[0])); if (__predict_false(error != 0)) return (error); } td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_rax; if (__predict_false(sa->code == SYS_syscall || sa->code == SYS___syscall || sa->code >= p->p_sysent->sv_size)) return (cpu_fetch_syscall_args_fallback(td, sa)); sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!")); - - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (__predict_false(sa->narg > NARGREGS)) return (cpu_fetch_syscall_args_fallback(td, sa)); memcpy(sa->args, &frame->tf_rdi, sizeof(sa->args[0]) * NARGREGS); td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } #include "../../kern/subr_syscall.c" static void (*syscall_ret_l1d_flush)(void); int syscall_ret_l1d_flush_mode; static void flush_l1d_hw(void) { wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D); } static void __inline amd64_syscall_ret_flush_l1d_inline(int error) { void (*p)(void); if (error != 0 && error != EEXIST && error != EAGAIN && error != EXDEV && error != ENOENT && error != ENOTCONN && error != EINPROGRESS) { p = syscall_ret_l1d_flush; if (p != NULL) p(); } } void amd64_syscall_ret_flush_l1d(int error) { amd64_syscall_ret_flush_l1d_inline(error); } void amd64_syscall_ret_flush_l1d_recalc(void) { bool l1d_hw; l1d_hw = (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0; again: switch (syscall_ret_l1d_flush_mode) { case 0: syscall_ret_l1d_flush = NULL; break; case 1: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : flush_l1d_sw_abi; break; case 2: syscall_ret_l1d_flush = l1d_hw ? flush_l1d_hw : NULL; break; case 3: syscall_ret_l1d_flush = flush_l1d_sw_abi; break; default: syscall_ret_l1d_flush_mode = 1; goto again; } } static int machdep_syscall_ret_flush_l1d(SYSCTL_HANDLER_ARGS) { int error, val; val = syscall_ret_l1d_flush_mode; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); syscall_ret_l1d_flush_mode = val; amd64_syscall_ret_flush_l1d_recalc(); return (0); } SYSCTL_PROC(_machdep, OID_AUTO, syscall_ret_flush_l1d, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, machdep_syscall_ret_flush_l1d, "I", "Flush L1D on syscall return with error (0 - off, 1 - on, " "2 - use hw only, 3 - use sw only"); /* * System call handler for native binaries. The trap frame is already * set up by the assembler trampoline and a pointer to it is saved in * td_frame. */ void amd64_syscall(struct thread *td, int traced) { int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!TRAPF_USERMODE(td->td_frame)) { panic("syscall"); /* NOT REACHED */ } #endif error = syscallenter(td); /* * Traced syscall. */ if (__predict_false(traced)) { td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_md.md_invl_gen.gen == 0, ("System call %s returning with leaked invl_gen %lu", syscallname(td->td_proc, td->td_sa.code), td->td_md.md_invl_gen.gen)); syscallret(td, error); /* * If the user-supplied value of %rip is not a canonical * address, then some CPUs will trigger a ring 0 #GP during * the sysret instruction. However, the fault handler would * execute in ring 0 with the user's %gs and %rsp which would * not be safe. Instead, use the full return path which * catches the problem safely. */ if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); amd64_syscall_ret_flush_l1d_inline(error); } Index: head/sys/amd64/conf/NOTES =================================================================== --- head/sys/amd64/conf/NOTES (revision 342242) +++ head/sys/amd64/conf/NOTES (revision 342243) @@ -1,683 +1,680 @@ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # This file contains machine dependent kernel configuration notes. For # machine independent notes, look in /sys/conf/NOTES. # # $FreeBSD$ # # # We want LINT to cover profiling as well. profile 2 # # Enable the kernel DTrace hooks which are required to load the DTrace # kernel modules. # options KDTRACE_HOOKS # DTrace core # NOTE: introduces CDDL-licensed components into the kernel #device dtrace # DTrace modules #device dtrace_profile #device dtrace_sdt #device dtrace_fbt #device dtrace_systrace #device dtrace_prototype #device dtnfscl #device dtmalloc # Alternatively include all the DTrace modules #device dtraceall ##################################################################### # SMP OPTIONS: # # Notes: # # IPI_PREEMPTION instructs the kernel to preempt threads running on other # CPUS if needed. Relies on the PREEMPTION option # Optional: options IPI_PREEMPTION device atpic # Optional legacy pic support device mptable # Optional MPSPEC mptable support # # Watchdog routines. # options MP_WATCHDOG # Debugging options. # options COUNT_XINVLTLB_HITS # Counters for TLB events options COUNT_IPIS # Per-CPU IPI interrupt counters ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. # cpu HAMMER # aka K8, aka Opteron & Athlon64 # # Options for CPU features. # ##################################################################### # NETWORKING OPTIONS # # DEVICE_POLLING adds support for mixed interrupt-polling handling # of network device drivers, which has significant benefits in terms # of robustness to overloads and responsivity, as well as permitting # accurate scheduling of the CPU time between kernel network processing # and other activities. The drawback is a moderate (up to 1/HZ seconds) # potential increase in response times. # It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING # to achieve smoother behaviour. # Additionally, you can enable/disable polling at runtime with help of # the ifconfig(8) utility, and select the CPU fraction reserved to # userland with the sysctl variable kern.polling.user_frac # (default 50, range 0..100). # # Not all device drivers support this mode of operation at the time of # this writing. See polling(4) for more details. options DEVICE_POLLING # BPF_JITTER adds support for BPF just-in-time compiler. options BPF_JITTER # OpenFabrics Enterprise Distribution (Infiniband). options OFED options OFED_DEBUG_INIT # Sockets Direct Protocol options SDP options SDP_DEBUG # IP over Infiniband options IPOIB options IPOIB_DEBUG options IPOIB_CM ##################################################################### # CLOCK OPTIONS # Provide read/write access to the memory in the clock chip. device nvram # Access to rtc cmos via /dev/nvram ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device speaker #Play IBM BASIC-style noises out your speaker hint.speaker.0.at="isa" hint.speaker.0.port="0x61" device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT! ##################################################################### # HARDWARE BUS CONFIGURATION # # ISA bus # device isa # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. options AUTO_EOI_1 #options AUTO_EOI_2 options MAXMEM=(128*1024) #options BROKEN_KEYBOARD_RESET # # AGP GART support device agp # # AGP debugging. # options AGP_DEBUG ##################################################################### # HARDWARE DEVICE CONFIGURATION # To include support for VGA VESA video modes options VESA # Turn on extra debugging checks and output for VESA support. options VESA_DEBUG device dpms # DPMS suspend & resume via VESA BIOS # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa options X86BIOS # # Optional devices: # # PS/2 mouse device psm hint.psm.0.at="atkbdc" hint.psm.0.irq="12" # Options for psm: options PSM_HOOKRESUME #hook the system resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event # The keyboard controller; it controls the keyboard and the PS/2 mouse. device atkbdc hint.atkbdc.0.at="isa" hint.atkbdc.0.port="0x060" # The AT keyboard device atkbd hint.atkbd.0.at="atkbdc" hint.atkbd.0.irq="1" # Options for atkbd: options ATKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions ATKBD_DFLT_KEYMAP=fr.dvorak # `flags' for atkbd: # 0x01 Force detection of keyboard, else we always assume a keyboard # 0x02 Don't reset keyboard, useful for some newer ThinkPads # 0x03 Force detection and avoid reset, might help with certain # dockingstations # 0x04 Old-style (XT) keyboard support, useful for older ThinkPads # Video card driver for VGA adapters. device vga hint.vga.0.at="isa" # Options for vga: # Try the following option if the mouse pointer is not drawn correctly # or font does not seem to be loaded properly. May cause flicker on # some systems. options VGA_ALT_SEQACCESS # If you can dispense with some vga driver features, you may want to # use the following options to save some memory. #options VGA_NO_FONT_LOADING # don't save/load font #options VGA_NO_MODE_CHANGE # don't change video modes # Older video cards may require this option for proper operation. options VGA_SLOW_IOACCESS # do byte-wide i/o's to TS and GDC regs # The following option probably won't work with the LCD displays. options VGA_WIDTH90 # support 90 column modes # Debugging. options VGA_DEBUG # vt(4) drivers. device vt_vga # VGA device vt_efifb # EFI framebuffer # Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA. device s3pci # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create # the /dev/3dfx0 device to work with glide implementations. This should get # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as # the tdfx DRI module from XFree86 and is completely unrelated. # # To enable Linuxulator support, one must also include COMPAT_LINUX in the # config as well. The other option is to load both as modules. device tdfx # Enable 3Dfx Voodoo support #XXX#device tdfx_linux # Enable Linuxulator support # # ACPI support using the Intel ACPI Component Architecture reference # implementation. # # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer # kernel environment variables to select initial debugging levels for the # Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER # defined when it is built). device acpi options ACPI_DEBUG # The cpufreq(4) driver provides support for non-ACPI CPU frequency control device cpufreq # Direct Rendering modules for 3D acceleration. device drm # DRM core module required by DRM drivers device mach64drm # ATI Rage Pro, Rage Mobility P/M, Rage XL device mgadrm # AGP Matrox G200, G400, G450, G550 device r128drm # ATI Rage 128 device savagedrm # S3 Savage3D, Savage4 device sisdrm # SiS 300/305, 540, 630 device tdfxdrm # 3dfx Voodoo 3/4/5 and Banshee device viadrm # VIA options DRM_DEBUG # Include debug printfs (slow) # # Network interfaces: # # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # HP PC Lan+, various PC Card devices # (requires miibus) # ipw: Intel PRO/Wireless 2100 IEEE 802.11 adapter # Requires the ipw firmware module # iwi: Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters # Requires the iwi firmware module # iwn: Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn # 802.11 network adapters # Requires the iwn firmware module # mthca: Mellanox HCA InfiniBand # mlx4ib: Mellanox ConnectX HCA InfiniBand # mlx4en: Mellanox ConnectX HCA Ethernet # nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source) # sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters # vmx: VMware VMXNET3 Ethernet (BSD open source) # wpi: Intel 3945ABG Wireless LAN controller # Requires the wpi firmware module device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards options ED_3C503 options ED_HPP options ED_SIC device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device ixl # Intel 700 Series Physical Function device iavf # Intel Adaptive Virtual Function device mthca # Mellanox HCA InfiniBand device mlx4 # Shared code module between IB and Ethernet device mlx4ib # Mellanox ConnectX HCA InfiniBand device mlx4en # Mellanox ConnectX HCA Ethernet device nfe # nVidia nForce MCP on-board Ethernet device sfxge # Solarflare SFC9000 10Gb Ethernet device vmx # VMware VMXNET3 Ethernet device wpi # Intel 3945ABG wireless NICs. # IEEE 802.11 adapter firmware modules # Intel PRO/Wireless 2100 firmware: # ipwfw: BSS/IBSS/monitor mode firmware # ipwbssfw: BSS mode firmware # ipwibssfw: IBSS mode firmware # ipwmonitorfw: Monitor mode firmware # Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware: # iwifw: BSS/IBSS/monitor mode firmware # iwibssfw: BSS mode firmware # iwiibssfw: IBSS mode firmware # iwimonitorfw: Monitor mode firmware # Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware: # iwnfw: Single module to support all devices # iwn1000fw: Specific module for the 1000 only # iwn105fw: Specific module for the 105 only # iwn135fw: Specific module for the 135 only # iwn2000fw: Specific module for the 2000 only # iwn2030fw: Specific module for the 2030 only # iwn4965fw: Specific module for the 4965 only # iwn5000fw: Specific module for the 5000 only # iwn5150fw: Specific module for the 5150 only # iwn6000fw: Specific module for the 6000 only # iwn6000g2afw: Specific module for the 6000g2a only # iwn6000g2bfw: Specific module for the 6000g2b only # iwn6050fw: Specific module for the 6050 only # wpifw: Intel 3945ABG Wireless LAN Controller firmware device iwifw device iwibssfw device iwiibssfw device iwimonitorfw device ipwfw device ipwbssfw device ipwibssfw device ipwmonitorfw device iwnfw device iwn1000fw device iwn105fw device iwn135fw device iwn2000fw device iwn2030fw device iwn4965fw device iwn5000fw device iwn5150fw device iwn6000fw device iwn6000g2afw device iwn6000g2bfw device iwn6050fw device wpifw # # Non-Transparent Bridge (NTB) drivers # device if_ntb # Virtual NTB network interface device ntb_transport # NTB packet transport driver device ntb # NTB hardware interface device ntb_hw_intel # Intel NTB hardware driver device ntb_hw_plx # PLX NTB hardware driver # #XXX this stores pointers in a 32bit field that is defined by the hardware #device pst # # Areca 11xx and 12xx series of SATA II RAID controllers. # CAM is required. # device arcmsr # Areca SATA II RAID # # Microsemi smartpqi controllers. # These controllers have a SCSI-like interface, and require the # CAM infrastructure. # device smartpqi # # 3ware 9000 series PATA/SATA RAID controller driver and options. # The driver is implemented as a SIM, and so, needs the CAM infrastructure. # options TWA_DEBUG # 0-10; 10 prints the most messages. device twa # 3ware 9000 series PATA/SATA RAID # # Adaptec FSA RAID controllers, including integrated DELL controllers, # the Dell PERC 2/QC and the HP NetRAID-4M device aac device aacp # SCSI Passthrough interface (optional, CAM required) # # Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming families device aacraid # Container interface, CAM required # # Highpoint RocketRAID 27xx. device hpt27xx # # Highpoint RocketRAID 182x. device hptmv # # Highpoint DC7280 and R750. device hptnr # # Highpoint RocketRAID. Supports RR172x, RR222x, RR2240, RR232x, RR2340, # RR2210, RR174x, RR2522, RR231x, RR230x. device hptrr # # Highpoint RocketRaid 3xxx series SATA RAID device hptiop # # IBM (now Adaptec) ServeRAID controllers device ips # # Intel integrated Memory Controller (iMC) SMBus controller # Sandybridge-Xeon, Ivybridge-Xeon, Haswell-Xeon, Broadwell-Xeon device imcsmb # # Intel C600 (Patsburg) integrated SAS controller device isci options ISCI_LOGGING # enable debugging in isci HAL # # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme # # PMC-Sierra SAS/SATA controller device pmspcv # # SafeNet crypto driver: can be moved to the MI NOTES as soon as # it's tested on a big-endian machine # device safe # SafeNet 1141 options SAFE_DEBUG # enable debugging support: hw.safe.debug options SAFE_RNDTEST # enable rndtest support # # VirtIO support # # The virtio entry provides a generic bus for use by the device drivers. # It must be combined with an interface that communicates with the host. # Multiple such interfaces are defined by the VirtIO specification. FreeBSD # only has support for PCI. Therefore, virtio_pci must be statically # compiled in or loaded as a module for the device drivers to function. # device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI Interface device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device # Microsoft Hyper-V enhancement support device hyperv # HyperV drivers # Xen HVM Guest Optimizations options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver ##################################################################### # # Miscellaneous hardware: # # ipmi: Intelligent Platform Management Interface # pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724) # smbios: DMI/SMBIOS entry point # vpd: Vital Product Data kernel interface # asmc: Apple System Management Controller # si: Specialix International SI/XIO or SX intelligent serial card # tpm: Trusted Platform Module # Notes on the Specialix SI/XIO driver: # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. device ipmi device pbio hint.pbio.0.at="isa" hint.pbio.0.port="0x360" device smbios device vpd device asmc device tpm device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device aesni # AES-NI OpenCrypto module device ioat # Intel I/OAT DMA engine # # Laptop/Notebook options: # # # I2C Bus # # # Hardware watchdog timers: # # ichwd: Intel ICH watchdog timer # amdsbwd: AMD SB7xx watchdog timer # viawd: VIA south bridge watchdog timer # wbwd: Winbond watchdog timer # device ichwd device amdsbwd device viawd device wbwd # # Temperature sensors: # # coretemp: on-die sensor on Intel Core and newer CPUs # amdtemp: on-die sensor on AMD K8/K10/K11 CPUs # device coretemp device amdtemp # # CPU control pseudo-device. Provides access to MSRs, CPUID info and # microcode update feature. # device cpuctl # # System Management Bus (SMB) # options ENABLE_ALART # Control alarm on Intel intpm driver # # AMD System Management Network (SMN) # device amdsmn # # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel and any # modules or other data loaded with the kernel by the loader. Each # page table page maps 2MB. # options NKPT=31 # EFI Runtime Services support options EFIRT ##################################################################### # ABI Emulation #XXX keep these here for now and reactivate when support for emulating #XXX these 32 bit binaries is added. # Enable 32-bit runtime support for FreeBSD/i386 binaries. options COMPAT_FREEBSD32 -# Enable iBCS2 runtime support for SCO and ISC binaries -#XXX#options IBCS2 - # Emulate spx device for client side of SVR3 local X interface #XXX#options SPX_HACK # Enable (32-bit) a.out binary support options COMPAT_AOUT # Enable 32-bit runtime support for CloudABI binaries. options COMPAT_CLOUDABI32 # Enable 64-bit runtime support for CloudABI binaries. options COMPAT_CLOUDABI64 # Enable Linux ABI emulation #XXX#options COMPAT_LINUX # Enable 32-bit Linux ABI emulation (requires COMPAT_FREEBSD32). options COMPAT_LINUX32 # Enable the linux-like proc filesystem support (requires COMPAT_LINUX32 # and PSEUDOFS) options LINPROCFS #Enable the linux-like sys filesystem support (requires COMPAT_LINUX32 # and PSEUDOFS) options LINSYSFS ##################################################################### # ZFS support options ZFS ##################################################################### # VM OPTIONS # KSTACK_PAGES is the number of memory pages to assign to the kernel # stack of each thread. options KSTACK_PAGES=5 # Enable detailed accounting by the PV entry allocator. options PV_STATS ##################################################################### # More undocumented options for linting. # Note that documenting these are not considered an affront. options FB_INSTALL_CDEV # install a CDEV entry in /dev options KBDIO_DEBUG=2 options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options PSM_DEBUG=1 options TIMER_FREQ=((14318182+6)/12) options VM_KMEM_SIZE options VM_KMEM_SIZE_MAX options VM_KMEM_SIZE_SCALE # Enable NDIS binary driver support options NDISAPI device ndis Index: head/sys/amd64/ia32/ia32_syscall.c =================================================================== --- head/sys/amd64/ia32/ia32_syscall.c (revision 342242) +++ head/sys/amd64/ia32/ia32_syscall.c (revision 342243) @@ -1,285 +1,283 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(int0x80_syscall_pti), IDTVEC(rsvd), IDTVEC(rsvd_pti); void ia32_syscall(struct trapframe *frame); /* Called from asm code */ void ia32_set_syscall_retval(struct thread *td, int error) { cpu_set_syscall_retval(td, error); } int ia32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; u_int32_t args[8], tmp; int error, i; #ifdef COMPAT_43 u_int32_t eip; int cs; #endif p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; #ifdef COMPAT_43 if (__predict_false(frame->tf_cs == 7 && frame->tf_rip == 2)) { /* * In lcall $7,$0 after int $0x80. Convert the user * frame to what it would be for a direct int 0x80 instead * of lcall $7,$0, by popping the lcall return address. */ error = fueword32((void *)frame->tf_rsp, &eip); if (error == -1) return (EFAULT); cs = fuword16((void *)(frame->tf_rsp + sizeof(u_int32_t))); if (cs == -1) return (EFAULT); /* * Unwind in-kernel frame after all stack frame pieces * were successfully read. */ frame->tf_rip = eip; frame->tf_cs = cs; frame->tf_rsp += 2 * sizeof(u_int32_t); frame->tf_err = 7; /* size of lcall $7,$0 */ } #endif params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t); sa->code = frame->tf_rax; /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword32(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(int); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. * We use a 32-bit fetch in case params is not * aligned. */ error = fueword32(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)args, (u_int)(sa->narg * sizeof(int))); else error = 0; for (i = 0; i < sa->narg; i++) sa->args[i] = args[i]; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; } return (error); } #include "../../kern/subr_syscall.c" void ia32_syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_rflags; int error; ksiginfo_t ksi; orig_tf_rflags = frame->tf_rflags; td = curthread; td->td_frame = frame; error = syscallenter(td); /* * Traced syscall. */ if (orig_tf_rflags & PSL_T) { frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_rip; trapsignal(td, &ksi); } syscallret(td, error); amd64_syscall_ret_flush_l1d(error); } static void ia32_syscall_enable(void *dummy) { setidt(IDT_SYSCALL, pti ? &IDTVEC(int0x80_syscall_pti) : &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0); } static void ia32_syscall_disable(void *dummy) { setidt(IDT_SYSCALL, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); } SYSINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_enable, NULL); SYSUNINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_disable, NULL); #ifdef COMPAT_43 int setup_lcall_gate(void) { struct i386_ldt_args uap; struct user_segment_descriptor desc; uint32_t lcall_addr; int error; bzero(&uap, sizeof(uap)); uap.start = 0; uap.num = 1; lcall_addr = curproc->p_sysent->sv_psstrings - sz_lcall_tramp; bzero(&desc, sizeof(desc)); desc.sd_type = SDT_MEMERA; desc.sd_dpl = SEL_UPL; desc.sd_p = 1; desc.sd_def32 = 1; desc.sd_gran = 1; desc.sd_lolimit = 0xffff; desc.sd_hilimit = 0xf; desc.sd_lobase = lcall_addr; desc.sd_hibase = lcall_addr >> 24; error = amd64_set_ldt(curthread, &uap, &desc); if (error != 0) return (error); return (0); } #endif Index: head/sys/amd64/linux/linux_sysvec.c =================================================================== --- head/sys/amd64/linux/linux_sysvec.c (revision 342242) +++ head/sys/amd64/linux/linux_sysvec.c (revision 342243) @@ -1,923 +1,922 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64, 1); #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux 64 debugging control"); #endif /* * Allow the sendsig functions to use the ldebug() facility even though they * are not syscalls themselves. Map them to syscall 0. This is slightly less * bogus than using ldebug(sigreturn). */ #define LINUX_SYS_linux_rt_sendsig 0 const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static register_t * linux_copyout_strings(struct image_params *imgp); static int linux_fixup_elf(register_t **stack_base, struct image_params *iparams); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static int linux_vsyscall(struct thread *td); #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc; frame = td->td_frame; sa = &td->td_sa; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rcx; sa->args[4] = frame->tf_r8; sa->args[5] = frame->tf_r9; sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; /* * On Linux only %rcx and %r11 values are not preserved across * the syscall. So, do not clobber %rdx and %r10. */ td->td_retval[1] = frame->tf_rdx; if (error != EJUSTRETURN) frame->tf_r10 = frame->tf_rcx; cpu_set_syscall_retval(td, error); /* Restore all registers. */ set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } static int linux_fixup_elf(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args; Elf_Auxinfo *argarray, *pos; Elf_Addr *auxbase, *base; struct ps_strings *arginfo; struct proc *p; int error, issetugid; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; KASSERT(curthread->td_proc == imgp->proc, ("unsafe linux_fixup_elf(), should be curproc")); base = (Elf64_Addr *)*stack_base; args = (Elf64_Auxargs *)imgp->auxargs; auxbase = base + imgp->args->argc + 1 + imgp->args->envc + 1; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, auxbase, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); if (error != 0) return (error); base--; if (suword(base, (uint64_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (register_t *)base; return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); vectp = (char **)destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ vectp -= howmany(LINUX_AT_COUNT * sizeof(Elf64_Auxinfo), sizeof(*vectp)); } /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* A null vector table pointer separates the argp's from the envp's. */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* The end of the vector table is a null pointer. */ suword(vectp, 0); return (stack_base); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs; struct pcb *pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; set_pcb_flags(pcb, PCB_FULL_IRET); saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } /* * Copied from amd64/amd64/machdep.c * * XXX fpu state need? don't think so */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct proc *p; struct l_ucontext uc; struct l_sigcontext *context; struct trapframe *regs; unsigned long rflags; int error; ksiginfo_t ksi; regs = td->td_frame; error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc)); if (error != 0) return (error); p = td->td_proc; context = &uc.uc_mcontext; rflags = context->sc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ #define RFLAG_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } PROC_LOCK(p); linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); regs->tf_rdi = context->sc_rdi; regs->tf_rsi = context->sc_rsi; regs->tf_rdx = context->sc_rdx; regs->tf_rbp = context->sc_rbp; regs->tf_rbx = context->sc_rbx; regs->tf_rcx = context->sc_rcx; regs->tf_rax = context->sc_rax; regs->tf_rip = context->sc_rip; regs->tf_rsp = context->sc_rsp; regs->tf_r8 = context->sc_r8; regs->tf_r9 = context->sc_r9; regs->tf_r10 = context->sc_r10; regs->tf_r11 = context->sc_r11; regs->tf_r12 = context->sc_r12; regs->tf_r13 = context->sc_r13; regs->tf_r14 = context->sc_r14; regs->tf_r15 = context->sc_r15; regs->tf_cs = context->sc_cs; regs->tf_err = context->sc_err; regs->tf_rflags = rflags; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * copied from amd64/amd64/machdep.c * * Send an interrupt to process. */ static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct l_rt_sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; caddr_t sp; struct trapframe *regs; int sig, code; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u", catcher, sig, mask, code); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe); } else sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul); mtx_unlock(&psp->ps_mtx); /* Translate the signal. */ sig = bsd_to_linux_signal(sig); /* Save user context. */ bzero(&sf, sizeof(sf)); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask); sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); sf.sf_sc.uc_mcontext.sc_rdi = regs->tf_rdi; sf.sf_sc.uc_mcontext.sc_rsi = regs->tf_rsi; sf.sf_sc.uc_mcontext.sc_rdx = regs->tf_rdx; sf.sf_sc.uc_mcontext.sc_rbp = regs->tf_rbp; sf.sf_sc.uc_mcontext.sc_rbx = regs->tf_rbx; sf.sf_sc.uc_mcontext.sc_rcx = regs->tf_rcx; sf.sf_sc.uc_mcontext.sc_rax = regs->tf_rax; sf.sf_sc.uc_mcontext.sc_rip = regs->tf_rip; sf.sf_sc.uc_mcontext.sc_rsp = regs->tf_rsp; sf.sf_sc.uc_mcontext.sc_r8 = regs->tf_r8; sf.sf_sc.uc_mcontext.sc_r9 = regs->tf_r9; sf.sf_sc.uc_mcontext.sc_r10 = regs->tf_r10; sf.sf_sc.uc_mcontext.sc_r11 = regs->tf_r11; sf.sf_sc.uc_mcontext.sc_r12 = regs->tf_r12; sf.sf_sc.uc_mcontext.sc_r13 = regs->tf_r13; sf.sf_sc.uc_mcontext.sc_r14 = regs->tf_r14; sf.sf_sc.uc_mcontext.sc_r15 = regs->tf_r15; sf.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags; sf.sf_sc.uc_mcontext.sc_err = regs->tf_err; sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); sf.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rax = 0; regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ regs->tf_rdx = (register_t)&sfp->sf_sc; /* arg 3 in %rdx */ sf.sf_handler = catcher; /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig); /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = linux_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #define LINUX_VSYSCALL_START (-10UL << 20) #define LINUX_VSYSCALL_SZ 1024 const unsigned long linux_vsyscall_vector[] = { LINUX_SYS_gettimeofday, LINUX_SYS_linux_time, /* getcpu not implemented */ }; static int linux_vsyscall(struct thread *td) { struct trapframe *frame; uint64_t retqaddr; int code, traced; int error; frame = td->td_frame; /* Check %rip for vsyscall area. */ if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START)) return (EINVAL); if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0) return (EINVAL); code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ; if (code >= nitems(linux_vsyscall_vector)) return (EINVAL); /* * vsyscall called as callq *(%rax), so we must * use return address from %rsp and also fixup %rsp. */ error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr)); if (error) return (error); frame->tf_rip = retqaddr; frame->tf_rax = linux_vsyscall_vector[code]; frame->tf_rsp += 8; traced = (frame->tf_flags & PSL_T); amd64_syscall(td, traced); return (0); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(void *param) { amd64_lower_shared_page(&elf_linux_sysvec); linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNULINUX_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNULINUX_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_glibc2brandshort = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-musl-x86_64.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, &linux_glibc2brandshort, &linux_muslbrand, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk64", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux x86-64 ELF exec handler installed\n"); } else printf("cannot insert Linux x86-64 ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); FEATURE(linux64, "Linux 64bit support"); Index: head/sys/amd64/linux32/linux32_sysvec.c =================================================================== --- head/sys/amd64/linux32/linux32_sysvec.c (revision 342242) +++ head/sys/amd64/linux32/linux32_sysvec.c (revision 342243) @@ -1,1119 +1,1118 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_compat.h" #include __FBSDID("$FreeBSD$"); #ifndef COMPAT_FREEBSD32 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" #endif #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); /* * Allow the sendsig functions to use the ldebug() facility even though they * are not syscalls themselves. Map them to syscall 0. This is slightly less * bogus than using ldebug(sigreturn). */ #define LINUX32_SYS_linux_rt_sendsig 0 #define LINUX32_SYS_linux_sendsig 0 const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux32_locore_o_start; extern char _binary_linux32_locore_o_end; extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup_elf(register_t **stack_base, struct image_params *iparams); static register_t *linux_copyout_strings(struct image_params *imgp); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void linux32_fixlimit(struct rlimit *rl, int which); static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc == imgp->proc, ("unsafe linux_fixup_elf(), should be curproc")); base = (Elf32_Addr *)*stack_base; args = (Elf32_Auxargs *)imgp->auxargs; auxbase = base + (imgp->args->argc + 1 + imgp->args->envc + 1); argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux32_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(&argarray[0], auxbase, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); if (error != 0) return (error); base--; if (suword32(base, (uint32_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (register_t *)base; return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int oonstack; int sig; int code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; frame.sf_siginfo = PTROUT(&fp->sf_si); frame.sf_ucontext = PTROUT(&fp->sf_sc); /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn and libgcc unwind. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int oonstack; int sig, code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; frame.sf_sc.sc_esp = regs->tf_rsp; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; frame.sf_sc.sc_eip = regs->tf_rip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_rflags; frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; sigset_t bmask; l_sigset_t lmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(sigreturn)) printf(ARGS(sigreturn, "%p"), (void *)args->sfp); #endif /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; lmask.__mask = frame.sf_extramask[0]; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ regs->tf_rdi = frame.sf_sc.sc_edi; regs->tf_rsi = frame.sf_sc.sc_esi; regs->tf_rbp = frame.sf_sc.sc_ebp; regs->tf_rbx = frame.sf_sc.sc_ebx; regs->tf_rdx = frame.sf_sc.sc_edx; regs->tf_rcx = frame.sf_sc.sc_ecx; regs->tf_rax = frame.sf_sc.sc_eax; regs->tf_rip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_es = frame.sf_sc.sc_es; regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_gs = frame.sf_sc.sc_gs; regs->tf_rflags = eflags; regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); #endif /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ regs->tf_gs = context->sc_gs; regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_rdi = context->sc_edi; regs->tf_rsi = context->sc_esi; regs->tf_rbp = context->sc_ebp; regs->tf_rbx = context->sc_ebx; regs->tf_rdx = context->sc_edx; regs->tf_rcx = context->sc_ecx; regs->tf_rax = context->sc_eax; regs->tf_rip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_rflags = eflags; regs->tf_rsp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = PTRIN(lss->ss_sp); ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); #endif (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rsi; sa->args[4] = frame->tf_rdi; sa->args[5] = frame->tf_rbp; /* Unconfirmed */ sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } /* * Clear registers on exec * XXX copied from ia32_signal.c. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_gs = _ugssel; regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = imgp->ps_strings; fpstate_drop(td); /* Do full restore on return so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); } /* * XXX copied from ia32_sysvec.c. */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; u_int32_t *vectp; char *stringp, *destp; u_int32_t *stack_base; struct linux32_ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); vectp = (uint32_t *)destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ vectp -= howmany(LINUX_AT_COUNT * sizeof(Elf32_Auxinfo), sizeof(*vectp)); } /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (uint32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* A null vector table pointer separates the argp's from the envp's. */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (uint32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* The end of the vector table is a null pointer. */ suword32(vectp, 0); return ((register_t *)stack_base); } static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, "32-bit Linux emulation"); static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, &linux32_maxdsiz, 0, ""); static u_long linux32_maxssiz = LINUX32_MAXSSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, &linux32_maxssiz, 0, ""); static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); #if defined(DEBUG) SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux debugging control"); #endif static void linux32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (linux32_maxdsiz != 0) { if (rl->rlim_cur > linux32_maxdsiz) rl->rlim_cur = linux32_maxdsiz; if (rl->rlim_max > linux32_maxdsiz) rl->rlim_max = linux32_maxdsiz; } break; case RLIMIT_STACK: if (linux32_maxssiz != 0) { if (rl->rlim_cur > linux32_maxssiz) rl->rlim_cur = linux32_maxssiz; if (rl->rlim_max > linux32_maxssiz) rl->rlim_max = linux32_maxssiz; } break; case RLIMIT_VMEM: if (linux32_maxvmem != 0) { if (rl->rlim_cur > linux32_maxvmem) rl->rlim_cur = linux32_maxvmem; if (rl->rlim_max > linux32_maxvmem) rl->rlim_max = linux32_maxvmem; } break; } } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX32_SYS_MAXSYSCALL, .sv_table = linux32_sysent, - .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux32_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = LINUX32_MAXUSER, .sv_usrstack = LINUX32_USRSTACK, .sv_psstrings = LINUX32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = linux32_fixlimit, .sv_maxssiz = &linux32_maxssiz, .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux32_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux32_locore_o_end - &_binary_linux32_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux32_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux32_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-musl-i386.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux32_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux32_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); FEATURE(linux, "Linux 32bit support"); Index: head/sys/arm/arm/elf_machdep.c =================================================================== --- head/sys/arm/arm/elf_machdep.c (revision 342242) +++ head/sys/arm/arm/elf_machdep.c (revision 342243) @@ -1,321 +1,320 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif static boolean_t elf32_arm_abi_supported(struct image_params *); u_long elf_hwcap; u_long elf_hwcap2; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = #if __ARM_ARCH >= 6 SV_SHP | SV_TIMEKEEP | #endif SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_hwcap2 = &elf_hwcap2, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static boolean_t elf32_arm_abi_supported(struct image_params *imgp) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; /* * When configured for EABI, FreeBSD supports EABI vesions 4 and 5. */ if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary (rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (FALSE); } return (TRUE); } void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { #ifdef VFP mcontext_vfp_t vfp; if (dst != NULL) { get_vfpcontext(td, &vfp); *off = elf32_populate_note(NT_ARM_VFP, &vfp, dst, sizeof(vfp), NULL); } else *off = elf32_populate_note(NT_ARM_VFP, NULL, NULL, sizeof(vfp), NULL); #endif } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* * It is possible for the compiler to emit relocations for unaligned data. * We handle this situation with these inlines. */ #define RELOC_ALIGNED_P(x) \ (((uintptr_t)(x) & (sizeof(void *) - 1)) == 0) static __inline Elf_Addr load_ptr(Elf_Addr *where) { Elf_Addr res; if (RELOC_ALIGNED_P(where)) return *where; memcpy(&res, where, sizeof(res)); return (res); } static __inline void store_ptr(Elf_Addr *where, Elf_Addr val) { if (RELOC_ALIGNED_P(where)) *where = val; else memcpy(where, &val, sizeof(val)); } #undef RELOC_ALIGNED_P /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = load_ptr(where); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_ARM_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (load_ptr(where) != addr) store_ptr(where, addr); } return (0); } switch (rtype) { case R_ARM_NONE: /* none */ break; case R_ARM_ABS32: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; store_ptr(where, addr + load_ptr(where)); break; case R_ARM_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_ARM_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error == 0) { store_ptr(where, addr); return (0); } return (-1); case R_ARM_RELATIVE: break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* * The pmap code does not do an icache sync upon establishing executable * mappings in the kernel pmap. It's an optimization based on the fact * that kernel memory allocations always have EXECUTABLE protection even * when the memory isn't going to hold executable code. The only time * kernel memory holding instructions does need a sync is after loading * a kernel module, and that's when this function gets called. * * This syncs data and instruction caches after loading a module. We * don't worry about the kernel itself (lf->id is 1) as locore.S did * that on entry. Even if data cache maintenance was done by IO code, * the relocation fixup process creates dirty cache entries that we must * write back before doing icache sync. The instruction cache sync also * invalidates the branch predictor cache on platforms that have one. */ if (lf->id == 1) return (0); #if __ARM_ARCH >= 6 dcache_wb_pou((vm_offset_t)lf->address, (vm_size_t)lf->size); icache_inv_all(); #else cpu_dcache_wb_range((vm_offset_t)lf->address, (vm_size_t)lf->size); cpu_l2cache_wb_range((vm_offset_t)lf->address, (vm_size_t)lf->size); cpu_icache_sync_range((vm_offset_t)lf->address, (vm_size_t)lf->size); #endif return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/arm/arm/syscall.c =================================================================== --- head/sys/arm/arm/syscall.c (revision 342242) +++ head/sys/arm/arm/syscall.c (revision 342243) @@ -1,178 +1,176 @@ /* $NetBSD: fault.c,v 1.45 2003/11/20 14:44:36 scw Exp $ */ /*- * Copyright 2004 Olivier Houchard * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1994-1997 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * fault.c * * Fault handlers * * Created : 28/11/94 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include void swi_handler(struct trapframe *); int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int error; sa = &td->td_sa; sa->code = td->td_frame->tf_r7; ap = &td->td_frame->tf_r0; if (sa->code == SYS_syscall) { sa->code = *ap++; sa->nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[_QUAD_LOWWORD]; sa->nap -= 2; ap += 2; } p = td->td_proc; - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; error = 0; memcpy(sa->args, ap, sa->nap * sizeof(register_t)); if (sa->narg > sa->nap) { error = copyin((void *)td->td_frame->tf_usr_sp, sa->args + sa->nap, (sa->narg - sa->nap) * sizeof(register_t)); } if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = 0; } return (error); } #include "../../kern/subr_syscall.c" static void syscall(struct thread *td, struct trapframe *frame) { int error; td->td_sa.nap = 4; error = syscallenter(td); KASSERT(error != 0 || td->td_ar == NULL, ("returning from syscall with td_ar set!")); syscallret(td, error); } void swi_handler(struct trapframe *frame) { struct thread *td = curthread; td->td_frame = frame; td->td_pticks = 0; /* * Enable interrupts if they were enabled before the exception. * Since all syscalls *should* come from user mode it will always * be safe to enable them, but check anyway. */ if (td->td_md.md_spinlock_count == 0) { if (__predict_true(frame->tf_spsr & PSR_I) == 0) enable_interrupts(PSR_I); if (__predict_true(frame->tf_spsr & PSR_F) == 0) enable_interrupts(PSR_F); } syscall(td, frame); } Index: head/sys/arm/arm/vm_machdep.c =================================================================== --- head/sys/arm/arm/vm_machdep.c (revision 342242) +++ head/sys/arm/arm/vm_machdep.c (revision 342243) @@ -1,349 +1,347 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary :forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * struct switchframe and trapframe must both be a multiple of 8 * for correct stack alignment. */ _Static_assert((sizeof(struct switchframe) % 8) == 0, "Bad alignment"); _Static_assert((sizeof(struct trapframe) % 8) == 0, "Bad alignment"); uint32_t initial_fpscr = VFPSCR_DN | VFPSCR_FZ; /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; struct mdproc *mdp2; if ((flags & RFPROC) == 0) return; /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *) (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; #ifdef VFP /* Store actual state of VFP */ if (curthread == td1) { critical_enter(); vfp_store(&td1->td_pcb->pcb_vfpstate, false); critical_exit(); } #endif td2->td_pcb = pcb2; /* Clone td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point to mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2)); /* Point the frame to the stack in front of pcb and copy td1's frame */ td2->td_frame = (struct trapframe *)pcb2 - 1; *td2->td_frame = *td1->td_frame; /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2); pcb2->pcb_regs.sf_r4 = (register_t)fork_return; pcb2->pcb_regs.sf_r5 = (register_t)td2; pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline; pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame); #if __ARM_ARCH >= 6 pcb2->pcb_regs.sf_tpidrurw = (register_t)get_tls(); #endif pcb2->pcb_vfpcpu = -1; pcb2->pcb_vfpstate.fpscr = initial_fpscr; tf = td2->td_frame; tf->tf_spsr &= ~PSR_C; tf->tf_r0 = 0; tf->tf_r1 = 0; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_cspr = PSR_SVC32_MODE; #if __ARM_ARCH < 6 td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS; #endif } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; int fixup; #ifdef __ARMEB__ u_int call; #endif frame = td->td_frame; fixup = 0; #ifdef __ARMEB__ /* * __syscall returns an off_t while most other syscalls return an * int. As an off_t is 64-bits and an int is 32-bits we need to * place the returned data into r1. As the lseek and freebsd6_lseek * syscalls also return an off_t they do not need this fixup. */ call = frame->tf_r7; if (call == SYS___syscall) { register_t *ap = &frame->tf_r0; register_t code = ap[_QUAD_LOWWORD]; - if (td->td_proc->p_sysent->sv_mask) - code &= td->td_proc->p_sysent->sv_mask; fixup = (code != SYS_lseek); } #endif switch (error) { case 0: if (fixup) { frame->tf_r0 = 0; frame->tf_r1 = td->td_retval[0]; } else { frame->tf_r0 = td->td_retval[0]; frame->tf_r1 = td->td_retval[1]; } frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ #if __ARM_ARCH >= 7 if ((frame->tf_spsr & PSR_T) != 0) frame->tf_pc -= THUMB_INSN_SIZE; else #endif frame->tf_pc -= INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_r0 = SV_ABI_ERRNO(td->td_proc, error); frame->tf_spsr |= PSR_C; /* carry bit */ break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return; td->td_pcb->pcb_regs.sf_r5 = (register_t)td; td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline; td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame); td->td_frame->tf_spsr &= ~PSR_C; td->td_frame->tf_r0 = 0; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = PSR_SVC32_MODE; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size); tf->tf_pc = (int)entry; tf->tf_r0 = (int)arg; tf->tf_spsr = PSR_USR32_MODE; } int cpu_set_user_tls(struct thread *td, void *tls_base) { #if __ARM_ARCH >= 6 td->td_pcb->pcb_regs.sf_tpidrurw = (register_t)tls_base; if (td == curthread) set_tls(tls_base); #else td->td_md.md_tp = (register_t)tls_base; if (td == curthread) { critical_enter(); *(register_t *)ARM_TP_ADDRESS = (register_t)tls_base; critical_exit(); } #endif return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; /* * Ensure td_frame is aligned to an 8 byte boundary as it will be * placed into the stack pointer which must be 8 byte aligned in * the ARM EABI. */ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1; } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_regs.sf_r4 = (register_t)func; /* function */ td->td_pcb->pcb_regs.sf_r5 = (register_t)arg; /* first arg */ } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending) busdma_swi(); } void cpu_exit(struct thread *td) { } Index: head/sys/arm64/arm64/elf_machdep.c =================================================================== --- head/sys/arm64/arm64/elf_machdep.c (revision 342242) +++ head/sys/arm64/arm64/elf_machdep.c (revision 342243) @@ -1,215 +1,214 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014 Andrew Turner. * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linker_if.h" static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_AARCH64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (ELF_R_TYPE(r_info) == R_AARCH64_IRELATIVE); } static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where, addr, addend, val; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_AARCH64_RELATIVE) *where = elf_relocaddr(lf, relocbase + addend); return (0); } switch (rtype) { case R_AARCH64_NONE: case R_AARCH64_RELATIVE: break; case R_AARCH64_ABS64: case R_AARCH64_GLOB_DAT: case R_AARCH64_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); *where = addr + addend; break; case R_AARCH64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return (-1); } return (0); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } /* Process one elf relocation with addend. */ int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_cpu_load_file(linker_file_t lf) { if (lf->id != 1) cpu_icache_sync_range((vm_offset_t)lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/arm64/arm64/trap.c =================================================================== --- head/sys/arm64/arm64/trap.c (revision 342242) +++ head/sys/arm64/arm64/trap.c (revision 342243) @@ -1,512 +1,510 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif #ifdef VFP #include #endif #ifdef KDB #include #endif #ifdef DDB #include #endif extern register_t fsu_intr_fault; /* Called from exception.S */ void do_el1h_sync(struct thread *, struct trapframe *); void do_el0_sync(struct thread *, struct trapframe *); void do_el0_error(struct trapframe *); void do_serror(struct trapframe *); void unhandled_exception(struct trapframe *); static void print_registers(struct trapframe *frame); int (*dtrace_invop_jump_addr)(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int nap; nap = 8; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); if (sa->narg > nap) panic("ARM64TODO: Could we have more than 8 args?"); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void svc_handler(struct thread *td, struct trapframe *frame) { int error; if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) { error = syscallenter(td); syscallret(td, error); } else { call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr); userret(td, frame); } } static void data_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { struct vm_map *map; struct proc *p; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; int error, sig, ucode; #ifdef KDB bool handled; #endif /* * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive * and Store-Exclusive instruction usage restrictions", state * of the exclusive monitors after data abort exception is unknown. */ clrex(); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif pcb = td->td_pcb; p = td->td_proc; if (lower) map = &p->p_vmspace->vm_map; else { /* The top bit tells us which range to use */ if (far >= VM_MAXUSER_ADDRESS) { map = kernel_map; } else { map = &p->p_vmspace->vm_map; if (map == NULL) map = kernel_map; } } /* * The call to pmap_fault can be dangerous when coming from the * kernel as it may be not be able to lock the pmap to check if * the address is now valid. Because of this we filter the cases * when we are not going to see superpage activity. */ if (!lower) { /* * We may fault in a DMAP region due to a superpage being * unmapped when the access took place. */ if (map == kernel_map && !VIRT_IN_DMAP(far)) goto no_pmap_fault; /* * We can also fault in the userspace handling functions, * e.g. copyin. In these cases we will have set a fault * handler so we can check if this is set before calling * pmap_fault. */ if (map != kernel_map && pcb->pcb_onfault == 0) goto no_pmap_fault; } if (pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) return; no_pmap_fault: KASSERT(td->td_md.md_spinlock_count == 0, ("data abort with spinlock held")); if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("data abort in critical section or under mutex"); } va = trunc_page(far); ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ; /* Fault in the page. */ error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (error != KERN_SUCCESS) { if (lower) { sig = SIGSEGV; if (error == KERN_PROTECTION_FAILURE) ucode = SEGV_ACCERR; else ucode = SEGV_MAPERR; call_trapsignal(td, sig, ucode, (void *)far); } else { if (td->td_intr_nesting_level == 0 && pcb->pcb_onfault != 0) { frame->tf_x[0] = error; frame->tf_elr = pcb->pcb_onfault; return; } printf("Fatal data abort:\n"); print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(ESR_ELx_EXCEPTION(esr), 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("vm_fault failed: %lx", frame->tf_elr); } } if (lower) userret(td, frame); } static void print_registers(struct trapframe *frame) { u_int reg; for (reg = 0; reg < nitems(frame->tf_x); reg++) { printf(" %sx%d: %16lx\n", (reg < 10) ? " " : "", reg, frame->tf_x[reg]); } printf(" sp: %16lx\n", frame->tf_sp); printf(" lr: %16lx\n", frame->tf_lr); printf(" elr: %16lx\n", frame->tf_elr); printf("spsr: %8x\n", frame->tf_spsr); } void do_el1h_sync(struct thread *td, struct trapframe *frame) { struct trapframe *oframe; uint32_t exception; uint64_t esr, far; /* Read the esr register to get the exception details */ esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR4(KTR_TRAP, "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); oframe = td->td_frame; switch (exception) { case EXCP_BRK: case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: break; default: td->td_frame = frame; break; } switch(exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP if ((td->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) { vfp_restore_state(); } else #endif { print_registers(frame); printf(" esr: %.8lx\n", esr); panic("VFP exception in the kernel"); } break; case EXCP_INSN_ABORT: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); intr_enable(); data_abort(td, frame, esr, far, 0); break; case EXCP_BRK: #ifdef KDTRACE_HOOKS if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \ dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(frame); break; } #endif #ifdef KDB kdb_trap(exception, 0, (td->td_frame != NULL) ? td->td_frame : frame); #else panic("No debugger in kernel.\n"); #endif frame->tf_elr += 4; break; case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: #ifdef KDB kdb_trap(exception, 0, (td->td_frame != NULL) ? td->td_frame : frame); #else panic("No debugger in kernel.\n"); #endif break; case EXCP_UNKNOWN: if (undef_insn(1, frame)) break; /* FALLTHROUGH */ default: print_registers(frame); panic("Unknown kernel exception %x esr_el1 %lx\n", exception, esr); } td->td_frame = oframe; } void do_el0_sync(struct thread *td, struct trapframe *frame) { pcpu_bp_harden bp_harden; uint32_t exception; uint64_t esr, far; /* Check we have a sane environment when entering from userland */ KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS, ("Invalid pcpu address from userland: %p (tpidr %lx)", get_pcpu(), READ_SPECIALREG(tpidr_el1))); esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); switch (exception) { case EXCP_INSN_ABORT_L: far = READ_SPECIALREG(far_el1); /* * Userspace may be trying to train the branch predictor to * attack the kernel. If we are on a CPU affected by this * call the handler to clear the branch predictor state. */ if (far > VM_MAXUSER_ADDRESS) { bp_harden = PCPU_GET(bp_harden); if (bp_harden != NULL) bp_harden(); } break; case EXCP_UNKNOWN: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); break; } intr_enable(); CTR4(KTR_TRAP, "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); switch(exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP vfp_restore_state(); #else panic("VFP exception in userland"); #endif break; case EXCP_SVC32: case EXCP_SVC64: svc_handler(td, frame); break; case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: data_abort(td, frame, esr, far, 1); break; case EXCP_UNKNOWN: if (!undef_insn(0, frame)) call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far); userret(td, frame); break; case EXCP_SP_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp); userret(td, frame); break; case EXCP_PC_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr); userret(td, frame); break; case EXCP_BRK: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr); userret(td, frame); break; case EXCP_MSR: call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr); userret(td, frame); break; case EXCP_SOFTSTP_EL0: td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; WRITE_SPECIALREG(MDSCR_EL1, READ_SPECIALREG(MDSCR_EL1) & ~DBG_MDSCR_SS); call_trapsignal(td, SIGTRAP, TRAP_TRACE, (void *)frame->tf_elr); userret(td, frame); break; default: call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr); userret(td, frame); break; } KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Kernel VFP flags set while entering userspace")); KASSERT( td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate, ("Kernel VFP state in use when entering userspace")); } /* * TODO: We will need to handle these later when we support ARMv8.2 RAS. */ void do_serror(struct trapframe *frame) { uint64_t esr, far; far = READ_SPECIALREG(far_el1); esr = frame->tf_esr; print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Unhandled System Error"); } void unhandled_exception(struct trapframe *frame) { uint64_t esr, far; far = READ_SPECIALREG(far_el1); esr = frame->tf_esr; print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("Unhandled exception"); } Index: head/sys/arm64/linux/linux_sysvec.c =================================================================== --- head/sys/arm64/linux/linux_sysvec.c (revision 342242) +++ head/sys/arm64/linux/linux_sysvec.c (revision 342243) @@ -1,547 +1,544 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * Copyright (c) 2018 Turing Robotic Industries Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64elf, 1); #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "64-bit Linux debugging control"); #endif const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static register_t *linux_copyout_strings(struct image_params *imgp); static int linux_elf_fixup(register_t **stack_base, struct image_params *iparams); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(const void *param); static void linux_vdso_deinstall(const void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static int linux_vsyscall(struct thread *td); /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /* DTrace probes */ LIN_SDT_PROBE_DEFINE2(sysvec, linux_translate_traps, todo, "int", "int"); LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sigreturn, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sendsig, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vsyscall, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_install, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_deinstall, todo); /* LINUXTODO: do we have traps to translate? */ static int linux_translate_traps(int signal, int trap_code) { LIN_SDT_PROBE2(sysvec, linux_translate_traps, todo, signal, trap_code); return (signal); } LINUX_VDSO_SYM_CHAR(linux_platform); static int linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct syscall_args *sa; register_t *ap; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; /* LINUXTODO: generic syscall? */ - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (sa->narg > 8) panic("ARM64TODO: Could we have more than 8 args?"); memcpy(sa->args, ap, 8 * sizeof(register_t)); td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { td->td_retval[1] = td->td_frame->tf_x[1]; cpu_set_syscall_retval(td, error); } static int linux_elf_fixup(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args; Elf_Auxinfo *argarray, *pos; Elf_Addr *auxbase, *base; struct ps_strings *arginfo; struct proc *p; int error, issetugid; LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo); p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; KASSERT(curthread->td_proc == imgp->proc, ("unsafe linux_elf_fixup(), should be curproc")); base = (Elf64_Addr *)*stack_base; args = (Elf64_Auxargs *)imgp->auxargs; /* Auxargs after argc, and NULL-terminated argv and envv lists. */ auxbase = base + 1 + imgp->args->argc + 1 + imgp->args->envc + 1; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); #if 0 /* LINUXTODO: implement arm64 LINUX_AT_HWCAP */ AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); #endif AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); #if 0 /* LINUXTODO: implement arm64 LINUX_AT_PLATFORM */ AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); #endif AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, auxbase, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); if (error != 0) return (error); return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. * LINUXTODO: deduplicate against other linuxulator archs */ static register_t * linux_copyout_strings(struct image_params *imgp) { char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; int argc, envc; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); vectp = (char **)destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to LINUX_AT_COUNT entries. */ vectp -= howmany(LINUX_AT_COUNT * sizeof(Elf64_Auxinfo), sizeof(*vectp)); } /* * Allocate room for argc and the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1; vectp = (char **)STACKALIGN(vectp); /* vectp also becomes our initial stack base. */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); suword(vectp++, argc); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* A null vector table pointer separates the argp's from the envp's. */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* The end of the vector table is a null pointer. */ suword(vectp, 0); return (stack_base); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; /* LINUXTODO: validate */ LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo); memset(regs, 0, sizeof(*regs)); /* glibc start.S registers function pointer in x0 with atexit. */ regs->tf_sp = stack; #if 0 /* LINUXTODO: See if this is used. */ regs->tf_lr = imgp->entry_addr; #else regs->tf_lr = 0xffffffffffffffff; #endif regs->tf_elr = imgp->entry_addr; } int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_rt_sigreturn, todo); return (EDOOFUS); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_rt_sendsig, todo); } static int linux_vsyscall(struct thread *td) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_vsyscall, todo); return (EDOOFUS); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_elf_fixup, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, /* XXX */ .sv_stackprot = VM_PROT_ALL, /* XXX */ .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(const void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("invalid Linux VDSO size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); memcpy(linux_shared_page_mapping, elf_linux_sysvec.sv_sigcode, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; printf("LINUXTODO: %s: fix linux_kplatform\n", __func__); #if 0 linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); #else linux_kplatform = "arm64"; #endif } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(const void *param) { LIN_SDT_PROBE0(sysvec, linux_vdso_deinstall, todo); __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNU_ABI_LINUX = 0; /* LINUXTODO: deduplicate */ static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNU_ABI_LINUX) return (false); *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_AARCH64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; struct linux_ioctl_handler**lihp; int error; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk64", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux arm64 ELF exec handler installed\n"); } break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); FEATURE(linux64, "AArch64 Linux 64bit support"); Index: head/sys/compat/ia32/ia32_sysvec.c =================================================================== --- head/sys/compat/ia32/ia32_sysvec.c (revision 342242) +++ head/sys/compat/ia32/ia32_sysvec.c (revision 342243) @@ -1,235 +1,234 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Doug Rabson * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct ia32_mcontext) == 640); CTASSERT(sizeof(struct ia32_ucontext) == 704); CTASSERT(sizeof(struct ia32_sigframe) == 800); CTASSERT(sizeof(struct siginfo32) == 64); #ifdef COMPAT_FREEBSD4 CTASSERT(sizeof(struct ia32_mcontext4) == 260); CTASSERT(sizeof(struct ia32_ucontext4) == 324); CTASSERT(sizeof(struct ia32_sigframe4) == 408); #endif extern const char *freebsd32_syscallnames[]; static SYSCTL_NODE(_compat, OID_AUTO, ia32, CTLFLAG_RW, 0, "ia32 mode"); static u_long ia32_maxdsiz = IA32_MAXDSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ia32_maxdsiz, 0, ""); u_long ia32_maxssiz = IA32_MAXSSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ia32_maxssiz, 0, ""); static u_long ia32_maxvmem = IA32_MAXVMEM; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RWTUN, &ia32_maxvmem, 0, ""); struct sysentvec ia32_freebsd_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = ia32_sigcode, .sv_szsigcode = &sz_ia32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = IA32_PAGE_SIZE, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec); static Elf32_Brandinfo ia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_info); static Elf32_Brandinfo ia32_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_oinfo); static Elf32_Brandinfo kia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &ia32_freebsd_sysvec, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kia32_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } void ia32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ia32_maxdsiz != 0) { if (rl->rlim_cur > ia32_maxdsiz) rl->rlim_cur = ia32_maxdsiz; if (rl->rlim_max > ia32_maxdsiz) rl->rlim_max = ia32_maxdsiz; } break; case RLIMIT_STACK: if (ia32_maxssiz != 0) { if (rl->rlim_cur > ia32_maxssiz) rl->rlim_cur = ia32_maxssiz; if (rl->rlim_max > ia32_maxssiz) rl->rlim_max = ia32_maxssiz; } break; case RLIMIT_VMEM: if (ia32_maxvmem != 0) { if (rl->rlim_cur > ia32_maxvmem) rl->rlim_cur = ia32_maxvmem; if (rl->rlim_max > ia32_maxvmem) rl->rlim_max = ia32_maxvmem; } break; } } Index: head/sys/conf/files.i386 =================================================================== --- head/sys/conf/files.i386 (revision 342242) +++ head/sys/conf/files.i386 (revision 342243) @@ -1,630 +1,611 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_i686.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_i686.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # linux_genassym.o optional compat_linux \ dependency "$S/i386/linux/linux_genassym.c offset.inc" \ compile-with "${CC} ${CFLAGS:N-flto:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux_genassym.o" # linux_assym.h optional compat_linux \ dependency "$S/kern/genassym.sh linux_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux_assym.h" # linux_locore.o optional compat_linux \ dependency "linux_assym.h $S/i386/linux/linux_locore.s" \ compile-with "${CC} -x assembler-with-cpp -DLOCORE -shared -s -pipe -I. -I$S -Werror -Wall -fPIC -fno-common -nostdinc -nostdlib -Wl,-T$S/i386/linux/linux_vdso.lds.s -Wl,-soname=linux_vdso.so,--eh-frame-hdr,-warn-common ${.IMPSRC} -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "linux_locore.o" # linux_vdso.so optional compat_linux \ dependency "linux_locore.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 linux_locore.o ${.TARGET}" \ no-implicit-rule \ clean "linux_vdso.so" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/i386-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/i386-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ no-implicit-rule # cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs | dtrace compile-with "${ZFS_S}" cddl/dev/dtrace/i386/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/i386/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/x86/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/dtrace/x86/dis_tables.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" cddl/dev/dtrace/x86/instr_size.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs compat/linux/linux_event.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_errno.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux compat/linux/linux_futex.c optional compat_linux compat/linux/linux_getcwd.c optional compat_linux compat/linux/linux_ioctl.c optional compat_linux compat/linux/linux_ipc.c optional compat_linux compat/linux/linux_mib.c optional compat_linux compat/linux/linux_misc.c optional compat_linux compat/linux/linux_mmap.c optional compat_linux compat/linux/linux_signal.c optional compat_linux compat/linux/linux_socket.c optional compat_linux compat/linux/linux_stats.c optional compat_linux compat/linux/linux_sysctl.c optional compat_linux compat/linux/linux_time.c optional compat_linux compat/linux/linux_timer.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux compat/linux/linux_vdso.c optional compat_linux compat/linux/linux.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx32_wrap.S optional ndisapi pci bf_enc.o optional crypto | ipsec | ipsec_support \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni aesni_ghash.o optional aesni \ dependency "$S/crypto/aesni/aesni_ghash.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/des/arch/i386/des_enc.S optional crypto | ipsec | ipsec_support | netsmb intel_sha1.o optional aesni \ dependency "$S/crypto/aesni/intel_sha1.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -msha ${.IMPSRC}" \ no-implicit-rule \ clean "intel_sha1.o" intel_sha256.o optional aesni \ dependency "$S/crypto/aesni/intel_sha256.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -msha ${.IMPSRC}" \ no-implicit-rule \ clean "intel_sha256.o" crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/acpica/acpi_pci.c optional acpi pci dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/acpica/acpi_pcib_acpi.c optional acpi pci dev/acpica/acpi_pcib_pci.c optional acpi pci dev/agp/agp_ali.c optional agp dev/agp/agp_amd.c optional agp dev/agp/agp_amd64.c optional agp dev/agp/agp_ati.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_intel.c optional agp dev/agp/agp_nvidia.c optional agp dev/agp/agp_sis.c optional agp dev/agp/agp_via.c optional agp dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdsmn/amdsmn.c optional amdsmn | amdtemp dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/bxe/bxe.c optional bxe pci dev/bxe/bxe_stats.c optional bxe pci dev/bxe/bxe_debug.c optional bxe pci dev/bxe/ecore_sp.c optional bxe pci dev/bxe/bxe_elink.c optional bxe pci dev/bxe/57710_init_values.c optional bxe pci dev/bxe/57711_init_values.c optional bxe pci dev/bxe/57712_init_values.c optional bxe pci dev/ce/ceddk.c optional ce dev/ce/if_ce.c optional ce dev/ce/tau32-ddk.c optional ce \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/coretemp/coretemp.c optional coretemp dev/cp/cpddk.c optional cp dev/cp/if_cp.c optional cp dev/cpuctl/cpuctl.c optional cpuctl dev/ctau/ctau.c optional ctau dev/ctau/ctddk.c optional ctau dev/ctau/if_ct.c optional ctau dev/cx/csigma.c optional cx dev/cx/cxddk.c optional cx dev/cx/if_cx.c optional cx dev/dpms/dpms.c optional dpms dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/ep/elink.c optional ep dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/fe/if_fe_isa.c optional fe isa dev/glxiic/glxiic.c optional glxiic dev/glxsb/glxsb.c optional glxsb dev/glxsb/glxsb_hash.c optional glxsb dev/gpio/bytgpio.c optional bytgpio dev/gpio/chvgpio.c optional chvgpio dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv dev/hyperv/utilities/hv_snapshot.c optional hyperv dev/hyperv/utilities/vmbus_heartbeat.c optional hyperv dev/hyperv/utilities/vmbus_ic.c optional hyperv dev/hyperv/utilities/vmbus_shutdown.c optional hyperv dev/hyperv/utilities/vmbus_timesync.c optional hyperv dev/hyperv/vmbus/hyperv.c optional hyperv dev/hyperv/vmbus/hyperv_busdma.c optional hyperv dev/hyperv/vmbus/vmbus.c optional hyperv pci dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv dev/hyperv/vmbus/vmbus_res.c optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/imcsmb/imcsmb.c optional imcsmb dev/imcsmb/imcsmb_pci.c optional imcsmb pci dev/intel/spi.c optional intelspi dev/io/iodev.c optional io dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux dev/le/if_le_isa.c optional le isa dev/nctgpio/nctgpio.c optional nctgpio dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_transport.c optional ntb_transport | if_ntb dev/ntb/ntb.c optional ntb | ntb_transport | if_ntb | ntb_hw_intel | ntb_hw_plx | ntb_hw dev/ntb/ntb_if.m optional ntb | ntb_transport | if_ntb | ntb_hw_intel | ntb_hw_plx | ntb_hw dev/ntb/ntb_hw/ntb_hw_intel.c optional ntb_hw_intel | ntb_hw dev/ntb/ntb_hw/ntb_hw_plx.c optional ntb_hw_plx | ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/ofw/ofwpci.c optional fdt pci dev/pcf/pcf_isa.c optional pcf dev/random/ivy.c optional rdrand_rng dev/random/nehemiah.c optional padlock_rng dev/sbni/if_sbni.c optional sbni dev/sbni/if_sbni_isa.c optional sbni isa dev/sbni/if_sbni_pci.c optional sbni pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/vmware/vmci/vmci.c optional vmci dev/vmware/vmci/vmci_datagram.c optional vmci dev/vmware/vmci/vmci_doorbell.c optional vmci dev/vmware/vmci/vmci_driver.c optional vmci dev/vmware/vmci/vmci_event.c optional vmci dev/vmware/vmci/vmci_hashtable.c optional vmci dev/vmware/vmci/vmci_kernel_if.c optional vmci dev/vmware/vmci/vmci_qpair.c optional vmci dev/vmware/vmci/vmci_queue_pair.c optional vmci dev/vmware/vmci/vmci_resource.c optional vmci dev/acpica/acpi_if.m standard dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_timer.c optional acpi dev/acpica/acpi_pxm.c optional acpi dev/acpi_support/acpi_wmi_if.m standard dev/wbwd/wbwd.c optional wbwd dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci i386/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/i386/acpica/acpi_wakecode.S assym.inc" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # i386/bios/apm.c optional apm i386/bios/smapi.c optional smapi i386/bios/smapi_bios.S optional smapi i386/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 #i386/i386/apic_vector.s optional apic i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/bpf_jit_machdep.c optional bpf_jitter i386/i386/copyout.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris i386/i386/elf_machdep.c standard i386/i386/exception.s standard i386/i386/gdb_machdep.c optional gdb i386/i386/geode.c optional cpu_geode i386/i386/in_cksum.c optional inet | inet6 i386/i386/initcpu.c standard i386/i386/io.c optional io i386/i386/k6_mem.c optional mem i386/i386/locore.s standard no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard i386/i386/mp_clock.c optional smp i386/i386/mp_machdep.c optional smp i386/i386/mpboot.s optional smp i386/i386/npx.c standard i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard i386/i386/prof_machdep.c optional profiling-routine i386/i386/ptrace_machdep.c standard i386/i386/sigtramp.s standard i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard -i386/ibcs2/ibcs2_errno.c optional ibcs2 -i386/ibcs2/ibcs2_fcntl.c optional ibcs2 -i386/ibcs2/ibcs2_ioctl.c optional ibcs2 -i386/ibcs2/ibcs2_ipc.c optional ibcs2 -i386/ibcs2/ibcs2_isc.c optional ibcs2 -i386/ibcs2/ibcs2_isc_sysent.c optional ibcs2 -i386/ibcs2/ibcs2_misc.c optional ibcs2 -i386/ibcs2/ibcs2_msg.c optional ibcs2 -i386/ibcs2/ibcs2_other.c optional ibcs2 -i386/ibcs2/ibcs2_signal.c optional ibcs2 -i386/ibcs2/ibcs2_socksys.c optional ibcs2 -i386/ibcs2/ibcs2_stat.c optional ibcs2 -i386/ibcs2/ibcs2_sysent.c optional ibcs2 -i386/ibcs2/ibcs2_sysi86.c optional ibcs2 -i386/ibcs2/ibcs2_sysvec.c optional ibcs2 -i386/ibcs2/ibcs2_util.c optional ibcs2 -i386/ibcs2/ibcs2_xenix.c optional ibcs2 -i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2 -i386/ibcs2/imgact_coff.c optional ibcs2 i386/linux/imgact_linux.c optional compat_linux i386/linux/linux_copyout.c optional compat_linux i386/linux/linux_dummy.c optional compat_linux i386/linux/linux_machdep.c optional compat_linux i386/linux/linux_ptrace.c optional compat_linux i386/linux/linux_sysent.c optional compat_linux i386/linux/linux_sysvec.c optional compat_linux i386/pci/pci_cfgreg.c optional pci i386/pci/pci_pir.c optional pci isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip kern/subr_sfbuf.c standard libkern/divdi3.c standard libkern/ffsll.c standard libkern/flsll.c standard libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard libkern/x86/crc32_sse42.c standard # # x86 real mode BIOS support, required by dpms/pci/vesa # compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa # # bvm console # dev/bvm/bvm_console.c optional bvmconsole dev/bvm/bvm_dbg.c optional bvmdebug # # x86 shared code between IA32 and AMD64 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi apic x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/smist.c optional cpufreq x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci x86/iommu/intel_ctx.c optional acpi acpi_dmar pci x86/iommu/intel_drv.c optional acpi acpi_dmar pci x86/iommu/intel_fault.c optional acpi acpi_dmar pci x86/iommu/intel_gas.c optional acpi acpi_dmar pci x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci x86/iommu/intel_intrmap.c optional acpi acpi_dmar pci x86/iommu/intel_qi.c optional acpi acpi_dmar pci x86/iommu/intel_quirks.c optional acpi acpi_dmar pci x86/iommu/intel_utils.c optional acpi acpi_dmar pci x86/isa/atpic.c optional atpic x86/isa/atrtc.c standard x86/isa/clock.c standard x86/isa/elcr.c optional atpic | apic x86/isa/isa.c optional isa x86/isa/isa_dma.c optional isa x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/autoconf.c standard x86/x86/bus_machdep.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/legacy.c standard x86/x86/local_apic.c optional apic x86/x86/mca.c standard x86/x86/x86_mem.c optional mem x86/x86/mptable.c optional apic x86/x86/mptable_pci.c optional apic pci x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/ucode.c standard x86/x86/pvclock.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xenhvm x86/xen/xen_apic.c optional xenhvm x86/xen/xenpv.c optional xenhvm x86/xen/xen_nexus.c optional xenhvm x86/xen/xen_msi.c optional xenhvm Index: head/sys/conf/options.amd64 =================================================================== --- head/sys/conf/options.amd64 (revision 342242) +++ head/sys/conf/options.amd64 (revision 342243) @@ -1,71 +1,70 @@ # $FreeBSD$ # Options specific to AMD64 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h COUNT_XINVLTLB_HITS opt_smp.h COUNT_IPIS opt_smp.h MAXMEM MPTABLE_FORCE_HTT MP_WATCHDOG NKPT opt_pmap.h PV_STATS opt_pmap.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h COMPAT_FREEBSD32 opt_global.h -#IBCS2 opt_dontuse.h #COMPAT_LINUX opt_dontuse.h COMPAT_LINUX32 opt_compat.h LINPROCFS opt_dontuse.h LINSYSFS opt_dontuse.h NDISAPI opt_dontuse.h TIMER_FREQ opt_clock.h # options for serial support COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h CONSPEED opt_sio.h GDBSPEED opt_sio.h COM_NO_ACPI opt_sio.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h # AGP debugging support AGP_DEBUG opt_agp.h ATKBD_DFLT_KEYMAP opt_atkbd.h # iWARP client interface support in ixl IXL_IW opt_ixl.h # ------------------------------- # EOF # ------------------------------- HAMMER opt_cpu.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h PSM_DEBUG opt_psm.h DEV_ATPIC opt_atpic.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h XENHVM opt_global.h # options for the Intel C600 SAS driver (isci) ISCI_LOGGING opt_isci.h # EFI Runtime services support EFIRT opt_efirt.h Index: head/sys/conf/options.i386 =================================================================== --- head/sys/conf/options.i386 (revision 342242) +++ head/sys/conf/options.i386 (revision 342243) @@ -1,122 +1,121 @@ # $FreeBSD$ # Options specific to the i386 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h BROKEN_KEYBOARD_RESET opt_reset.h COUNT_XINVLTLB_HITS opt_smp.h COUNT_IPIS opt_smp.h DISABLE_PG_G opt_pmap.h DISABLE_PSE opt_pmap.h I586_PMC_GUPROF opt_i586_guprof.h MAXMEM MPTABLE_FORCE_HTT MP_WATCHDOG NKPT opt_pmap.h PERFMON PMAP_SHPGPERPROC opt_pmap.h POWERFAIL_NMI opt_trap.h PV_STATS opt_pmap.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h -IBCS2 opt_dontuse.h COMPAT_LINUX opt_dontuse.h LINPROCFS opt_dontuse.h LINSYSFS opt_dontuse.h NDISAPI opt_dontuse.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h # Physical address extensions and support for >4G ram. As above. PAE opt_global.h # Use PAE page tables, but limit memory support to 4GB. # This keeps the i386 non-PAE KBI, in particular, drivers see # 32bit vm_paddr_t. PAE_TABLES opt_global.h TIMER_FREQ opt_clock.h CPU_ATHLON_SSE_HACK opt_cpu.h CPU_BLUELIGHTNING_3X opt_cpu.h CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h CPU_BTB_EN opt_cpu.h CPU_CYRIX_NO_LOCK opt_cpu.h CPU_DIRECT_MAPPED_CACHE opt_cpu.h CPU_DISABLE_5X86_LSSER opt_cpu.h CPU_ELAN opt_cpu.h CPU_ELAN_PPS opt_cpu.h CPU_ELAN_XTAL opt_cpu.h CPU_ENABLE_LONGRUN opt_cpu.h CPU_FASTER_5X86_FPU opt_cpu.h CPU_GEODE opt_cpu.h CPU_I486_ON_386 opt_cpu.h CPU_IORT opt_cpu.h CPU_L2_LATENCY opt_cpu.h CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SOEKRIS opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h NO_F00F_HACK opt_cpu.h NO_MEMORY_HOLE opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h # options for serial support COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h CONSPEED opt_sio.h GDBSPEED opt_sio.h COM_NO_ACPI opt_sio.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h # AGP debugging support AGP_DEBUG opt_agp.h PSM_DEBUG opt_psm.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h ATKBD_DFLT_KEYMAP opt_atkbd.h # Video spigot SPIGOT_UNSECURE opt_spigot.h # Enables NETGRAPH support for Cronyx adapters NETGRAPH_CRONYX opt_ng_cronyx.h # Device options DEV_APIC opt_apic.h DEV_ATPIC opt_atpic.h # Debugging NPX_DEBUG opt_npx.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h XENHVM opt_global.h # options for the Intel C600 SAS driver (isci) ISCI_LOGGING opt_isci.h Index: head/sys/i386/Makefile =================================================================== --- head/sys/i386/Makefile (revision 342242) +++ head/sys/i386/Makefile (revision 342243) @@ -1,39 +1,39 @@ # $FreeBSD$ # @(#)Makefile 8.1 (Berkeley) 6/11/93 # Makefile for i386 links, tags file # SYS is normally set in Make.tags.inc SYS=/sys TAGDIR= i386 .include "../kern/Make.tags.inc" all: @echo "make links or tags only" # Directories in which to place i386 tags links -DI386= apm i386 ibcs2 include isa linux +DI386= apm i386 include isa linux links:: -for i in ${COMMDIR1}; do \ (cd $$i && { rm -f tags; ln -s ../${TAGDIR}/tags tags; }) done -for i in ${COMMDIR2}; do \ (cd $$i && { rm -f tags; ln -s ../../${TAGDIR}/tags tags; }) done -for i in ${DI386}; do \ (cd $$i && { rm -f tags; ln -s ../tags tags; }) done SI386= ${SYS}/i386/acpica/*.[ch] ${SYS}/i386/bios/*.[ch] \ - ${SYS}/i386/i386/*.[ch] ${SYS}/i386/ibcs2/*.[ch] \ - ${SYS}/i386/include/*.[ch] ${SYS}/i386/isa/*.[ch] \ - ${SYS}/i386/linux/*.[ch] ${SYS}/i386/pci/*.[ch] + ${SYS}/i386/i386/*.[ch] ${SYS}/i386/include/*.[ch] \ + ${SYS}/i386/isa/*.[ch] ${SYS}/i386/linux/*.[ch] \ + ${SYS}/i386/pci/*.[ch] AI386= ${SYS}/i386/i386/*.s tags:: -ctags -wdt ${COMM} ${SI386} egrep "^ENTRY(.*)|^ALTENTRY(.*)" ${AI386} | \ sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$$/;" \ >> tags sort -o tags tags chmod 444 tags Index: head/sys/i386/conf/NOTES =================================================================== --- head/sys/i386/conf/NOTES (revision 342242) +++ head/sys/i386/conf/NOTES (revision 342243) @@ -1,949 +1,946 @@ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # This file contains machine dependent kernel configuration notes. For # machine independent notes, look in /sys/conf/NOTES. # # $FreeBSD$ # # # We want LINT to cover profiling as well. profile 2 # # Enable the kernel DTrace hooks which are required to load the DTrace # kernel modules. # options KDTRACE_HOOKS # DTrace core # NOTE: introduces CDDL-licensed components into the kernel #device dtrace # DTrace modules #device dtrace_profile #device dtrace_sdt #device dtrace_fbt #device dtrace_systrace #device dtrace_prototype #device dtnfscl #device dtmalloc # Alternatively include all the DTrace modules #device dtraceall ##################################################################### # SMP OPTIONS: # # The apic device enables the use of the I/O APIC for interrupt delivery. # The apic device can be used in both UP and SMP kernels, but is required # for SMP kernels. Thus, the apic device is not strictly an SMP option, # but it is a prerequisite for SMP. # # Notes: # # HTT CPUs should only be used if they are enabled in the BIOS. For # the ACPI case, ACPI only correctly tells us about any HTT CPUs if # they are enabled. However, most HTT systems do not list HTT CPUs # in the MP Table if they are enabled, thus we guess at the HTT CPUs # for the MP Table case. However, we shouldn't try to guess and use # these CPUs if HTT is disabled. Thus, HTT guessing is only enabled # for the MP Table if the user explicitly asks for it via the # MPTABLE_FORCE_HTT option. Do NOT use this option if you have HTT # disabled in your BIOS. # # IPI_PREEMPTION instructs the kernel to preempt threads running on other # CPUS if needed. Relies on the PREEMPTION option # Mandatory: device apic # I/O apic # Optional: options MPTABLE_FORCE_HTT # Enable HTT CPUs with the MP Table options IPI_PREEMPTION # # Watchdog routines. # options MP_WATCHDOG # Debugging options. # options COUNT_XINVLTLB_HITS # Counters for TLB events options COUNT_IPIS # Per-CPU IPI interrupt counters ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. # cpu I486_CPU cpu I586_CPU # aka Pentium(tm) cpu I686_CPU # aka Pentium Pro(tm) # # Options for CPU features. # # CPU_ATHLON_SSE_HACK tries to enable SSE instructions when the BIOS has # forgotten to enable them. # # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning # CPU if CPU supports it. The default is double-clock mode on # BlueLightning CPU box. # # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM # BlueLightning CPU. It works only with Cyrix FPU, and this option # should not be used with Intel FPU. # # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1). # # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1. # Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3) # # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct # mapped mode. Default is 2-way set associative mode. # # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e., enables # reorder). This option should not be used if you use memory mapped # I/O device(s). # # CPU_ELAN enables support for AMDs ElanSC520 CPU. # CPU_ELAN_PPS enables precision timestamp code. # CPU_ELAN_XTAL sets the clock crystal frequency in Hz. # # CPU_ENABLE_LONGRUN enables support for Transmeta Crusoe LongRun # technology which allows to restrict power consumption of the CPU by # using group of hw.crusoe.* sysctls. # # CPU_FASTER_5X86_FPU enables faster FPU exception handler. # # CPU_GEODE is for the SC1100 Geode embedded processor. This option # is necessary because the i8254 timecounter is toast. # # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products # for i386 machines. # # CPU_IORT defines I/O clock delay time (NOTE 1). Default values of # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively # (no clock delay). # # CPU_L2_LATENCY specifies the L2 cache latency value. This option is used # only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected. # The default value is 5. # # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE # 1). # # CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs. This option # is useful when you use Socket 8 to Socket 370 converter, because most Pentium # Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs. # # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1). # # CPU_SOEKRIS enables support www.soekris.com hardware. # # CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU # enters suspend mode following execution of HALT instruction. # # CPU_UPGRADE_HW_CACHE eliminates unneeded cache flush instruction(s). # # CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD # K5/K6/K6-2 CPUs. # # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache # flush at hold state. # # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs # without cache flush at hold state, and (2) write-back CPU cache on # Cyrix 6x86 whose revision < 2.7 (NOTE 2). # # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is # executed. This option is only needed if I586_CPU is also defined, # and should be included for any non-Pentium CPU that defines it. # # NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors # which indicates that the 15-16MB range is *definitely* not being # occupied by an ISA memory hole. # # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT, # CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs. # These options may crash your system. # # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled # in write-through mode when revision < 2.7. If revision of Cyrix # 6x86 >= 2.7, CPU cache is always enabled in write-back mode. # # NOTE 3: This option may cause failures for software that requires # locked cycles in order to operate correctly. # options CPU_ATHLON_SSE_HACK options CPU_BLUELIGHTNING_3X options CPU_BLUELIGHTNING_FPU_OP_CACHE options CPU_BTB_EN options CPU_DIRECT_MAPPED_CACHE options CPU_DISABLE_5X86_LSSER options CPU_ELAN options CPU_ELAN_PPS options CPU_ELAN_XTAL=32768000 options CPU_ENABLE_LONGRUN options CPU_FASTER_5X86_FPU options CPU_GEODE options CPU_I486_ON_386 options CPU_IORT options CPU_L2_LATENCY=5 options CPU_LOOP_EN options CPU_PPRO2CELERON options CPU_RSTK_EN options CPU_SOEKRIS options CPU_SUSP_HLT options CPU_UPGRADE_HW_CACHE options CPU_WT_ALLOC options CYRIX_CACHE_WORKS options CYRIX_CACHE_REALLY_WORKS #options NO_F00F_HACK # Debug options options NPX_DEBUG # enable npx debugging # # PERFMON causes the driver for Pentium/Pentium Pro performance counters # to be compiled. See perfmon(4) for more information. # options PERFMON ##################################################################### # NETWORKING OPTIONS # # DEVICE_POLLING adds support for mixed interrupt-polling handling # of network device drivers, which has significant benefits in terms # of robustness to overloads and responsivity, as well as permitting # accurate scheduling of the CPU time between kernel network processing # and other activities. The drawback is a moderate (up to 1/HZ seconds) # potential increase in response times. # It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING # to achieve smoother behaviour. # Additionally, you can enable/disable polling at runtime with help of # the ifconfig(8) utility, and select the CPU fraction reserved to # userland with the sysctl variable kern.polling.user_frac # (default 50, range 0..100). # # Not all device drivers support this mode of operation at the time of # this writing. See polling(4) for more details. options DEVICE_POLLING # BPF_JITTER adds support for BPF just-in-time compiler. options BPF_JITTER # OpenFabrics Enterprise Distribution (Infiniband). options OFED options OFED_DEBUG_INIT # Sockets Direct Protocol options SDP options SDP_DEBUG # IP over Infiniband options IPOIB options IPOIB_DEBUG options IPOIB_CM ##################################################################### # CLOCK OPTIONS # Provide read/write access to the memory in the clock chip. device nvram # Access to rtc cmos via /dev/nvram ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device speaker #Play IBM BASIC-style noises out your speaker hint.speaker.0.at="isa" hint.speaker.0.port="0x61" device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT! device apm_saver # Requires APM ##################################################################### # HARDWARE BUS CONFIGURATION # # ISA bus # device isa # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. options AUTO_EOI_1 #options AUTO_EOI_2 options MAXMEM=(128*1024) #options BROKEN_KEYBOARD_RESET # # AGP GART support device agp # AGP debugging. options AGP_DEBUG ##################################################################### # HARDWARE DEVICE CONFIGURATION # To include support for VGA VESA video modes options VESA # Turn on extra debugging checks and output for VESA support. options VESA_DEBUG device dpms # DPMS suspend & resume via VESA BIOS # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa options X86BIOS # # Hints for the non-optional Numeric Processing eXtension driver. hint.npx.0.flags="0x0" hint.npx.0.irq="13" # # `flags' for npx0: # 0x01 don't use the npx registers to optimize bcopy. # 0x02 don't use the npx registers to optimize bzero. # 0x04 don't use the npx registers to optimize copyin or copyout. # The npx registers are normally used to optimize copying and zeroing when # all of the following conditions are satisfied: # I586_CPU is an option # the cpu is an i586 (perhaps not a Pentium) # the probe for npx0 succeeds # INT 16 exception handling works. # Then copying and zeroing using the npx registers is normally 30-100% faster. # The flags can be used to control cases where it doesn't work or is slower. # Setting them at boot time using hints works right (the optimizations # are not used until later in the bootstrap when npx0 is attached). # Flag 0x08 automatically disables the i586 optimized routines. # # # Optional devices: # # PS/2 mouse device psm hint.psm.0.at="atkbdc" hint.psm.0.irq="12" # Options for psm: options PSM_HOOKRESUME #hook the system resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event # The keyboard controller; it controls the keyboard and the PS/2 mouse. device atkbdc hint.atkbdc.0.at="isa" hint.atkbdc.0.port="0x060" # The AT keyboard device atkbd hint.atkbd.0.at="atkbdc" hint.atkbd.0.irq="1" # Options for atkbd: options ATKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions ATKBD_DFLT_KEYMAP=fr.dvorak # `flags' for atkbd: # 0x01 Force detection of keyboard, else we always assume a keyboard # 0x02 Don't reset keyboard, useful for some newer ThinkPads # 0x03 Force detection and avoid reset, might help with certain # dockingstations # 0x04 Old-style (XT) keyboard support, useful for older ThinkPads # Video card driver for VGA adapters. device vga hint.vga.0.at="isa" # Options for vga: # Try the following option if the mouse pointer is not drawn correctly # or font does not seem to be loaded properly. May cause flicker on # some systems. options VGA_ALT_SEQACCESS # If you can dispense with some vga driver features, you may want to # use the following options to save some memory. #options VGA_NO_FONT_LOADING # don't save/load font #options VGA_NO_MODE_CHANGE # don't change video modes # Older video cards may require this option for proper operation. options VGA_SLOW_IOACCESS # do byte-wide i/o's to TS and GDC regs # The following option probably won't work with the LCD displays. options VGA_WIDTH90 # support 90 column modes # Debugging. options VGA_DEBUG # vt(4) drivers. device vt_vga # Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA. device s3pci # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create # the /dev/3dfx0 device to work with glide implementations. This should get # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as # the tdfx DRI module from XFree86 and is completely unrelated. # # To enable Linuxulator support, one must also include COMPAT_LINUX in the # config as well. The other option is to load both as modules. device tdfx # Enable 3Dfx Voodoo support device tdfx_linux # Enable Linuxulator support # # ACPI support using the Intel ACPI Component Architecture reference # implementation. # # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer # kernel environment variables to select initial debugging levels for the # Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER # defined when it is built). device acpi options ACPI_DEBUG options ACPI_DMAR # ACPI WMI Mapping driver device acpi_wmi # ACPI Asus Extras (LCD backlight/brightness, video output, etc.) device acpi_asus # ACPI Fujitsu Extras (Buttons) device acpi_fujitsu # ACPI extras driver for HP laptops device acpi_hp # ACPI extras driver for IBM laptops device acpi_ibm # ACPI Panasonic Extras (LCD backlight/brightness, video output, etc.) device acpi_panasonic # ACPI Sony extra (LCD brightness) device acpi_sony # ACPI Toshiba Extras (LCD backlight/brightness, video output, etc.) device acpi_toshiba # ACPI Video Extensions (LCD backlight/brightness, video output, etc.) device acpi_video # ACPI Docking Station device acpi_dock # ACPI ASOC ATK0110 ASUSTeK AI Booster (voltage, temperature and fan sensors) device aibs # The cpufreq(4) driver provides support for non-ACPI CPU frequency control device cpufreq # Direct Rendering modules for 3D acceleration. device drm # DRM core module required by DRM drivers device mach64drm # ATI Rage Pro, Rage Mobility P/M, Rage XL device mgadrm # AGP Matrox G200, G400, G450, G550 device r128drm # ATI Rage 128 device savagedrm # S3 Savage3D, Savage4 device sisdrm # SiS 300/305, 540, 630 device tdfxdrm # 3dfx Voodoo 3/4/5 and Banshee device viadrm # VIA options DRM_DEBUG # Include debug printfs (slow) # # Network interfaces: # # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # ce: Cronyx Tau-PCI/32 sync single/dual port G.703/E1 serial adaptor # with 32 HDLC subchannels (requires sppp (default), or NETGRAPH if # NETGRAPH_CRONYX is configured) # cp: Cronyx Tau-PCI sync single/dual/four port # V.35/RS-232/RS-530/RS-449/X.21/G.703/E1/E3/T3/STS-1 # serial adaptor (requires sppp (default), or NETGRAPH if # NETGRAPH_CRONYX is configured) # cs: IBM Etherjet and other Crystal Semi CS89x0-based adapters # ctau: Cronyx Tau sync dual port V.35/RS-232/RS-530/RS-449/X.21/G.703/E1 # serial adaptor (requires sppp (default), or NETGRAPH if # NETGRAPH_CRONYX is configured) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # HP PC Lan+, various PC Card devices # (requires miibus) # ipw: Intel PRO/Wireless 2100 IEEE 802.11 adapter # iwi: Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters # Requires the iwi firmware module # iwn: Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn # 802.11 network adapters # Requires the iwn firmware module # mthca: Mellanox HCA InfiniBand # mlx4ib: Mellanox ConnectX HCA InfiniBand # mlx4en: Mellanox ConnectX HCA Ethernet # nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source) # sbni: Granch SBNI12-xx ISA and PCI adapters # vmx: VMware VMXNET3 Ethernet (BSD open source) # wpi: Intel 3945ABG Wireless LAN controller # Requires the wpi firmware module # Order for ISA/EISA devices is important here device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device ce device cp device cs # Crystal Semiconductor CS89x0 NIC hint.cs.0.at="isa" hint.cs.0.port="0x300" device ctau hint.ctau.0.at="isa" hint.ctau.0.port="0x240" hint.ctau.0.irq="15" hint.ctau.0.drq="7" #options NETGRAPH_CRONYX # Enable NETGRAPH support for Cronyx adapter(s) device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards options ED_3C503 options ED_HPP options ED_SIC hint.ed.0.at="isa" hint.ed.0.port="0x280" hint.ed.0.irq="5" hint.ed.0.maddr="0xd8000" device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. # Hint for the i386-only ISA front-end of le(4). hint.le.0.at="isa" hint.le.0.port="0x280" hint.le.0.irq="10" hint.le.0.drq="0" device mthca # Mellanox HCA InfiniBand device mlx4 # Shared code module between IB and Ethernet device mlx4ib # Mellanox ConnectX HCA InfiniBand device mlx4en # Mellanox ConnectX HCA Ethernet device nfe # nVidia nForce MCP on-board Ethernet device sbni hint.sbni.0.at="isa" hint.sbni.0.port="0x210" hint.sbni.0.irq="0xefdead" hint.sbni.0.flags="0" device vmx # VMware VMXNET3 Ethernet device wpi # Intel 3945ABG wireless NICs. # IEEE 802.11 adapter firmware modules # Intel PRO/Wireless 2100 firmware: # ipwfw: BSS/IBSS/monitor mode firmware # ipwbssfw: BSS mode firmware # ipwibssfw: IBSS mode firmware # ipwmonitorfw: Monitor mode firmware # Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware: # iwifw: BSS/IBSS/monitor mode firmware # iwibssfw: BSS mode firmware # iwiibssfw: IBSS mode firmware # iwimonitorfw: Monitor mode firmware # Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware: # iwnfw: Single module to support all devices # iwn1000fw: Specific module for the 1000 only # iwn105fw: Specific module for the 105 only # iwn135fw: Specific module for the 135 only # iwn2000fw: Specific module for the 2000 only # iwn2030fw: Specific module for the 2030 only # iwn4965fw: Specific module for the 4965 only # iwn5000fw: Specific module for the 5000 only # iwn5150fw: Specific module for the 5150 only # iwn6000fw: Specific module for the 6000 only # iwn6000g2afw: Specific module for the 6000g2a only # iwn6000g2bfw: Specific module for the 6000g2b only # iwn6050fw: Specific module for the 6050 only # wpifw: Intel 3945ABG Wireless LAN Controller firmware device iwifw device iwibssfw device iwiibssfw device iwimonitorfw device ipwfw device ipwbssfw device ipwibssfw device ipwmonitorfw device iwnfw device iwn1000fw device iwn105fw device iwn135fw device iwn2000fw device iwn2030fw device iwn4965fw device iwn5000fw device iwn5150fw device iwn6000fw device iwn6000g2afw device iwn6000g2bfw device iwn6050fw device wpifw # # Non-Transparent Bridge (NTB) drivers # device if_ntb # Virtual NTB network interface device ntb_transport # NTB packet transport driver device ntb # NTB hardware interface device ntb_hw_intel # Intel NTB hardware driver device ntb_hw_plx # PLX NTB hardware driver # # ATA raid adapters # device pst # # Areca 11xx and 12xx series of SATA II RAID controllers. # CAM is required. # device arcmsr # Areca SATA II RAID # # 3ware 9000 series PATA/SATA RAID controller driver and options. # The driver is implemented as a SIM, and so, needs the CAM infrastructure. # options TWA_DEBUG # 0-10; 10 prints the most messages. device twa # 3ware 9000 series PATA/SATA RAID # # Adaptec FSA RAID controllers, including integrated DELL controllers, # the Dell PERC 2/QC and the HP NetRAID-4M device aac device aacp # SCSI Passthrough interface (optional, CAM required) # # Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming families device aacraid # Container interface, CAM required # # Highpoint RocketRAID 27xx. device hpt27xx # # Highpoint RocketRAID 182x. device hptmv # # Highpoint DC7280 and R750. device hptnr # # Highpoint RocketRAID. Supports RR172x, RR222x, RR2240, RR232x, RR2340, # RR2210, RR174x, RR2522, RR231x, RR230x. device hptrr # # Highpoint RocketRaid 3xxx series SATA RAID device hptiop # # Intel integrated Memory Controller (iMC) SMBus controller # Sandybridge-Xeon, Ivybridge-Xeon, Haswell-Xeon, Broadwell-Xeon device imcsmb # # IBM (now Adaptec) ServeRAID controllers device ips # # Intel C600 (Patsburg) integrated SAS controller device isci options ISCI_LOGGING # enable debugging in isci HAL # # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme # # PMC-Sierra SAS/SATA controller device pmspcv # # SafeNet crypto driver: can be moved to the MI NOTES as soon as # it's tested on a big-endian machine # device safe # SafeNet 1141 options SAFE_DEBUG # enable debugging support: hw.safe.debug options SAFE_RNDTEST # enable rndtest support # # glxiic is an I2C driver for the AMD Geode LX CS5536 System Management Bus # controller. Requires 'device iicbus'. # device glxiic # AMD Geode LX CS5536 System Management Bus # # glxsb is a driver for the Security Block in AMD Geode LX processors. # Requires 'device crypto'. # device glxsb # AMD Geode LX Security Block # # VirtIO support # # The virtio entry provides a generic bus for use by the device drivers. # It must be combined with an interface that communicates with the host. # Multiple such interfaces defined by the VirtIO specification. FreeBSD # only has support for PCI. Therefore, virtio_pci must be statically # compiled in or loaded as a module for the device drivers to function. # device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI Interface device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device device hyperv # HyperV drivers ##################################################################### # # Miscellaneous hardware: # # apm: Laptop Advanced Power Management (experimental) # ipmi: Intelligent Platform Management Interface # smapi: System Management Application Program Interface driver # smbios: DMI/SMBIOS entry point # vpd: Vital Product Data kernel interface # pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724) # asmc: Apple System Management Controller # si: Specialix International SI/XIO or SX intelligent serial card driver # tpm: Trusted Platform Module # Notes on APM # The flags takes the following meaning for apm0: # 0x0020 Statclock is broken. # Notes on the Specialix SI/XIO driver: # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. # Notes on the Sony Programmable I/O controller # This is a temporary driver that should someday be replaced by something # that hooks into the ACPI layer. The device is hooked to the PIIX4's # General Device 10 decoder, which means you have to fiddle with PCI # registers to map it in, even though it is otherwise treated here as # an ISA device. At the moment, the driver polls, although the device # is capable of generating interrupts. It largely undocumented. # The port location in the hint is where you WANT the device to be # mapped. 0x10a0 seems to be traditional. At the moment the jogdial # is the only thing truly supported, but apparently a fair percentage # of the Vaio extra features are controlled by this device. device apm hint.apm.0.flags="0x20" device ipmi device smapi device smbios device vpd device pbio hint.pbio.0.at="isa" hint.pbio.0.port="0x360" device asmc device tpm device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device aesni # AES-NI OpenCrypto module # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # # I2C Bus # # Philips i2c bus support is provided by the `iicbus' device. # # Supported interfaces: # pcf Philips PCF8584 ISA-bus controller # device pcf hint.pcf.0.at="isa" hint.pcf.0.port="0x320" hint.pcf.0.irq="5" # # Hardware watchdog timers: # # ichwd: Intel ICH watchdog timer # amdsbwd: AMD SB7xx watchdog timer # viawd: VIA south bridge watchdog timer # wbwd: Winbond watchdog timer # device ichwd device amdsbwd device viawd device wbwd # # Temperature sensors: # # coretemp: on-die sensor on Intel Core and newer CPUs # amdtemp: on-die sensor on AMD K8/K10/K11 CPUs # device coretemp device amdtemp # # CPU control pseudo-device. Provides access to MSRs, CPUID info and # microcode update feature. # device cpuctl # # System Management Bus (SMB) # options ENABLE_ALART # Control alarm on Intel intpm driver # # Set the number of PV entries per process. Increasing this can # stop panics related to heavy use of shared memory. However, that can # (combined with large amounts of physical memory) cause panics at # boot time due the kernel running out of VM space. # # If you're tweaking this, you might also want to increase the sysctls # "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target". # # The value below is the one more than the default. # options PMAP_SHPGPERPROC=201 # # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel, any # modules or other data loaded with the kernel by the loader, and data # structures allocated before the VM system is initialized such as the # vm_page_t array. Each page table page maps 4MB (2MB with PAE). # options NKPT=31 ##################################################################### # ABI Emulation -# Enable iBCS2 runtime support for SCO and ISC binaries -#options IBCS2 - # Emulate spx device for client side of SVR3 local X interface options SPX_HACK # Enable (32-bit) a.out binary support options COMPAT_AOUT # Enable 32-bit runtime support for CloudABI binaries. options COMPAT_CLOUDABI32 # Enable Linux ABI emulation options COMPAT_LINUX # Enable the linux-like proc filesystem support (requires COMPAT_LINUX # and PSEUDOFS) options LINPROCFS #Enable the linux-like sys filesystem support (requires COMPAT_LINUX # and PSEUDOFS) options LINSYSFS # Enable NDIS binary driver support options NDISAPI device ndis ##################################################################### # VM OPTIONS # KSTACK_PAGES is the number of memory pages to assign to the kernel # stack of each thread. options KSTACK_PAGES=5 # Enable detailed accounting by the PV entry allocator. options PV_STATS ##################################################################### # More undocumented options for linting. # Note that documenting these are not considered an affront. options FB_INSTALL_CDEV # install a CDEV entry in /dev options I586_PMC_GUPROF=0x70000 options KBDIO_DEBUG=2 options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options PSM_DEBUG=1 options TIMER_FREQ=((14318182+6)/12) options VM_KMEM_SIZE options VM_KMEM_SIZE_MAX options VM_KMEM_SIZE_SCALE Index: head/sys/i386/i386/elf_machdep.c =================================================================== --- head/sys/i386/i386/elf_machdep.c (revision 342242) +++ head/sys/i386/i386/elf_machdep.c (revision 342243) @@ -1,298 +1,297 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); static Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { npxgetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_386_IRELATIVE); } #define ERI_LOCAL 0x0001 /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup, int flags) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if ((flags & ERI_LOCAL) != 0) { if (rtype == R_386_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } return (0); } switch (rtype) { case R_386_NONE: /* none */ break; case R_386_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; if (*where != addr) *where = addr; break; case R_386_PC32: /* S + A - P */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend - (Elf_Addr)where; if (*where != addr) *where = addr; break; case R_386_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_386_GLOB_DAT: /* S */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_386_RELATIVE: break; case R_386_IRELATIVE: addr = relocbase + addend; addr = ((Elf_Addr (*)(void))addr)(); if (*where != addr) *where = addr; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup, 0)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup, ERI_LOCAL)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/i386/i386/trap.c =================================================================== --- head/sys/i386/i386/trap.c (revision 342242) +++ head/sys/i386/i386/trap.c (revision 342243) @@ -1,1168 +1,1166 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif void trap(struct trapframe *frame); void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); #define MAX_TRAP_MSG 32 struct trap_data { bool ei; const char *msg; }; static const struct trap_data trap_data[] = { [T_PRIVINFLT] = { .ei = true, .msg = "privileged instruction fault" }, [T_BPTFLT] = { .ei = false, .msg = "breakpoint instruction fault" }, [T_ARITHTRAP] = { .ei = true, .msg = "arithmetic trap" }, [T_PROTFLT] = { .ei = true, .msg = "general protection fault" }, [T_TRCTRAP] = { .ei = false, .msg = "debug exception" }, [T_PAGEFLT] = { .ei = true, .msg = "page fault" }, [T_ALIGNFLT] = { .ei = true, .msg = "alignment fault" }, [T_DIVIDE] = { .ei = true, .msg = "integer divide fault" }, [T_NMI] = { .ei = false, .msg = "non-maskable interrupt trap" }, [T_OFLOW] = { .ei = true, .msg = "overflow trap" }, [T_BOUND] = { .ei = true, .msg = "FPU bounds check fault" }, [T_DNA] = { .ei = true, .msg = "FPU device not available" }, [T_DOUBLEFLT] = { .ei = false, .msg = "double fault" }, [T_FPOPFLT] = { .ei = true, .msg = "FPU operand fetch fault" }, [T_TSSFLT] = { .ei = true, .msg = "invalid TSS fault" }, [T_SEGNPFLT] = { .ei = true, .msg = "segment not present fault" }, [T_STKFLT] = { .ei = true, .msg = "stack fault" }, [T_MCHK] = { .ei = true, .msg = "machine check trap" }, [T_XMMFLT] = { .ei = true, .msg = "SIMD floating-point exception" }, [T_DTRACE_RET] ={ .ei = true, .msg = "DTrace pid return trap" }, }; static bool trap_enable_intr(int trapno) { MPASS(trapno > 0); if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL) return (trap_data[trapno].ei); return (false); } static const char * trap_msg(int trapno) { const char *res; static const char unkn[] = "UNKNOWN"; res = NULL; if (trapno < nitems(trap_data)) res = trap_data[trapno].msg; if (res == NULL) res = unkn; return (res); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif static int prot_fault_translation = 0; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; int signo, ucode; u_int type; register_t addr, dr6; vm_offset_t eva; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; KASSERT((read_eflags() & PSL_I) == 0, ("trap: interrupts enabled, type %d frame %p", type, frame)); #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if (type == T_MCHK) { mca_intr(); return; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) return; #endif /* * We must not allow context switches until %cr2 is read. * Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts. * All faults use interrupt gates, so %cr2 can be safely read * now, before optional enable of the interrupts below. */ if (type == T_PAGEFLT) eva = rcr2(); /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but it * is better than running with interrupts disabled until they * are accidentally enabled later. */ if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf("pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); /* * Conditionally reenable interrupts. If we hold a spin lock, * then we must not reenable interrupts. This might be a * spurious page fault. */ if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 && frame->tf_eip != (int)cpu_switch_load_gs) enable_intr(); if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ enable_intr(); #ifdef KDTRACE_HOOKS if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(frame) == 0) return; #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); user_trctrap_out: signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_eflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = npxtrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; /* * The following two traps can happen in vm86 mode, * and, if so, we want to handle them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { load_dr6(rdr6() | 0x4000); goto user_trctrap_out; } if (signo == 0) goto user; break; } signo = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ signo = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (signo == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; signo = SIGILL; break; } #endif if (signo == -1) return; if (signo == 0) goto user; if (signo == SIGSEGV) ucode = SEGV_MAPERR; else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = T_PAGEFLT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = T_PAGEFLT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) return; uprintf("pid %d killed due to lack of floating point\n", p->p_pid); signo = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = npxtrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); if (dtrace_return_probe_ptr != NULL) dtrace_return_probe_ptr(frame); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) return; break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); return; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto kernel_trctrap; } if (signo != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); return; } /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif return; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * N.B. Comparing to long mode, 32-bit mode * does not push %esp on the trap frame, * because iretl faulted while in ring 0. As * the consequence, there is no need to fixup * the stack pointer for doreti_iret_fault, * the fixup and the complimentary trap() call * are executed on the main thread stack, not * on the trampoline stack. */ if (frame->tf_eip == (int)doreti_iret + setidt_disp) { frame->tf_eip = (int)doreti_iret_fault + setidt_disp; return; } if (type == T_STKFLT) break; if (frame->tf_eip == (int)doreti_popl_ds + setidt_disp) { frame->tf_eip = (int)doreti_popl_ds_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_es + setidt_disp) { frame->tf_eip = (int)doreti_popl_es_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_fs + setidt_disp) { frame->tf_eip = (int)doreti_popl_fs_fault + setidt_disp; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ kernel_trctrap: /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6) && !(curpcb->pcb_flags & PCB_VM86CALL)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (frame->tf_eip == (uintptr_t)IDTVEC(int0x80_syscall) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(bpt) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(dbg) + setidt_disp) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_ss, frame->tf_esp, frame->tf_cs, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } static int trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) { struct thread *td; struct proc *p; vm_offset_t va; vm_map_t map; int rv; vm_prot_t ftype; td = curthread; p = td->td_proc; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= PMAP_TRM_MIN_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return (-2); #endif if (usermode) return (SIGSEGV); trap_fatal(frame, eva); return (-1); } else { map = usermode ? &p->p_vmspace->vm_map : kernel_map; /* * Kernel cannot access a user-space address directly * because user pages are not mapped. Also, page * faults must not be caused during the interrupts. */ if (!usermode && td->td_intr_nesting_level != 0) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type), frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", #if defined(PAE) || defined(PAE_TABLES) pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : #endif "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } else { printf("error code = %#x\n", code); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; frame->tf_err = eva; /* smuggle fault address to ddb */ handled = kdb_trap(type, 0, frame); frame->tf_err = code; /* restore error code */ kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); if (trap_msg(type) != NULL) panic("%s", trap_msg(type)); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler(void) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip); printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp); printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; long tmp; int error; #ifdef COMPAT_43 u_int32_t eip; int cs; #endif p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; #ifdef COMPAT_43 if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) { /* * In lcall $7,$0 after int $0x80. Convert the user * frame to what it would be for a direct int 0x80 instead * of lcall $7,$0, by popping the lcall return address. */ error = fueword32((void *)frame->tf_esp, &eip); if (error == -1) return (EFAULT); cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t))); if (cs == -1) return (EFAULT); /* * Unwind in-kernel frame after all stack frame pieces * were successfully read. */ frame->tf_eip = eip; frame->tf_cs = cs; frame->tf_esp += 2 * sizeof(u_int32_t); frame->tf_err = 7; /* size of lcall $7,$0 */ } #endif sa->code = frame->tf_eax; params = (caddr_t)frame->tf_esp + sizeof(uint32_t); /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(uint32_t); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)sa->args, (u_int)(sa->narg * sizeof(uint32_t))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_eflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!(TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; error = syscallenter(td); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); syscallret(td, error); } Index: head/sys/i386/linux/linux_sysvec.c =================================================================== --- head/sys/i386/linux/linux_sysvec.c (revision 342242) +++ head/sys/i386/linux/linux_sysvec.c (revision 342243) @@ -1,1107 +1,1105 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux debugging control"); #endif /* * Allow the sendsig functions to use the ldebug() facility even though they * are not syscalls themselves. Map them to syscall 0. This is slightly less * bogus than using ldebug(sigreturn). */ #define LINUX_SYS_linux_rt_sendsig 0 #define LINUX_SYS_linux_sendsig 0 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup(register_t **stack_base, struct image_params *iparams); static int linux_fixup_elf(register_t **stack_base, struct image_params *iparams); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static register_t *linux_copyout_strings(struct image_params *imgp); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static int linux_szplatform; const char *linux_kplatform; static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; static eventhandler_tag linux_thread_dtor_tag; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)args->argc + 1); (*stack_base)--; suword(*stack_base, (intptr_t)(void *)envp); (*stack_base)--; suword(*stack_base, (intptr_t)(void *)argv); (*stack_base)--; suword(*stack_base, imgp->args->argc); return (0); } static int linux_fixup_elf(register_t **stack_base, struct image_params *imgp) { struct proc *p; Elf32_Auxargs *args; Elf32_Auxinfo *argarray, *pos; Elf32_Addr *auxbase, *uplatform; struct ps_strings *arginfo; int error, issetugid; KASSERT(curthread->td_proc == imgp->proc, ("unsafe linux_fixup_elf(), should be curproc")); p = imgp->proc; issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); args = (Elf32_Auxargs *)imgp->auxargs; auxbase = *stack_base + imgp->args->argc + 1 + imgp->args->envc + 1; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, auxbase, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); if (error != 0) return (error); (*stack_base)--; if (suword(*stack_base, (register_t)imgp->args->argc) == -1) return (EFAULT); return (0); } /* * Copied from kern/kern_exec.c */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* Calculate string base and vector table pointers. */ p = imgp->proc; if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *)); /* Install LINUX_PLATFORM. */ copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform), linux_szplatform); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - linux_szplatform - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - linux_szplatform - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); vectp = (char **)destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ vectp -= howmany(LINUX_AT_COUNT * sizeof(Elf32_Auxinfo), sizeof(*vectp)); } /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* A null vector table pointer separates the argp's from the envp's. */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* The end of the vector table is a null pointer. */ suword(vectp, 0); return (stack_base); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int sig, code; int oonstack; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; frame.sf_siginfo = &fp->sf_si; frame.sf_ucontext = &fp->sf_sc; /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = NULL; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_gs = rgs(); frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_rt_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int sig, code; int oonstack; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; sig = ksi->ksi_signo; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = rgs(); frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_edi; frame.sf_sc.sc_esi = regs->tf_esi; frame.sf_sc.sc_ebp = regs->tf_ebp; frame.sf_sc.sc_ebx = regs->tf_ebx; frame.sf_sc.sc_esp = regs->tf_esp; frame.sf_sc.sc_edx = regs->tf_edx; frame.sf_sc.sc_ecx = regs->tf_ecx; frame.sf_sc.sc_eax = regs->tf_eax; frame.sf_sc.sc_eip = regs->tf_eip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_eflags; frame.sf_sc.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; l_sigset_t lmask; sigset_t bmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(sigreturn)) printf(ARGS(sigreturn, "%p"), (void *)args->sfp); #endif /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_es = frame.sf_sc.sc_es; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_edi = frame.sf_sc.sc_edi; regs->tf_esi = frame.sf_sc.sc_esi; regs->tf_ebp = frame.sf_sc.sc_ebp; regs->tf_ebx = frame.sf_sc.sc_ebx; regs->tf_edx = frame.sf_sc.sc_edx; regs->tf_ecx = frame.sf_sc.sc_ecx; regs->tf_eax = frame.sf_sc.sc_eax; regs->tf_eip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_eflags = eflags; regs->tf_esp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); #endif /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_edi = context->sc_edi; regs->tf_esi = context->sc_esi; regs->tf_ebp = context->sc_ebp; regs->tf_ebx = context->sc_ebx; regs->tf_edx = context->sc_edx; regs->tf_ecx = context->sc_ecx; regs->tf_eax = context->sc_eax; regs->tf_eip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_eflags = eflags; regs->tf_esp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; /* Call sigaltstack & ignore results. */ lss = &uc.uc_stack; ss.ss_sp = lss->ss_sp; ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); #endif (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_eax; sa->args[0] = frame->tf_ebx; sa->args[1] = frame->tf_ecx; sa->args[2] = frame->tf_edx; sa->args[3] = frame->tf_esi; sa->args[4] = frame->tf_edi; sa->args[5] = frame->tf_ebp; /* Unconfirmed */ if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; return (0); } /* * exec_setregs may initialize some registers differently than Linux * does, thus potentially confusing Linux binaries. If necessary, we * override the exec_setregs default(s) here. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct pcb *pcb = td->td_pcb; exec_setregs(td, imgp, stack); /* Linux sets %gs to 0, we default to _udatasel. */ pcb->pcb_gs = 0; load_gs(0); pcb->pcb_initial_npxcw = __LINUX_NPXCW__; } static void linux_get_machine(const char **dst) { switch (cpu_class) { case CPUCLASS_686: *dst = "i686"; break; case CPUCLASS_586: *dst = "i586"; break; case CPUCLASS_486: *dst = "i486"; break; default: *dst = "i386"; } } struct sysentvec linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux a.out", .sv_coredump = NULL, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF", .sv_coredump = elf32_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = LINUX_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-musl-i386.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_get_machine(&linux_kplatform); linux_szplatform = roundup(strlen(linux_kplatform) + 1, sizeof(char *)); linux_osd_jail_register(); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); FEATURE(linux, "Linux 32bit support"); Index: head/sys/kern/imgact_aout.c =================================================================== --- head/sys/kern/imgact_aout.c (revision 342242) +++ head/sys/kern/imgact_aout.c (revision 342243) @@ -1,348 +1,346 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #include #include #include #include #endif static int exec_aout_imgact(struct image_params *imgp); static int aout_fixup(register_t **stack_base, struct image_params *imgp); #define AOUT32_USRSTACK 0xbfc00000 #if defined(__i386__) #define AOUT32_PS_STRINGS (AOUT32_USRSTACK - sizeof(struct ps_strings)) struct sysentvec aout_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = aout_fixup, .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; #elif defined(__amd64__) #define AOUT32_PS_STRINGS \ (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings)) #define AOUT32_MINUSER FREEBSD32_MINUSER extern const char *freebsd32_syscallnames[]; extern u_long ia32_maxssiz; struct sysentvec aout_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = aout_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = ia32_sigcode, .sv_szsigcode = &sz_ia32_sigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = IA32_PAGE_SIZE, .sv_minuser = AOUT32_MINUSER, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, }; #else #error "Port me" #endif static int aout_fixup(register_t **stack_base, struct image_params *imgp) { *(char **)stack_base -= sizeof(uint32_t); return (suword32(*stack_base, imgp->args->argc)); } static int exec_aout_imgact(struct image_params *imgp) { const struct exec *a_out = (const struct exec *) imgp->image_header; struct vmspace *vmspace; vm_map_t map; vm_object_t object; vm_offset_t text_end, data_end; unsigned long virtual_offset; unsigned long file_offset; unsigned long bss_size; int error; /* * Linux and *BSD binaries look very much alike, * only the machine id is different: * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI. * NetBSD is in network byte order.. ugh. */ if (((a_out->a_midmag >> 16) & 0xff) != 0x86 && ((a_out->a_midmag >> 16) & 0xff) != 0 && ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86) return -1; /* * Set file/virtual offset based on a.out variant. * We do two cases: host byte order and network byte order * (for NetBSD compatibility) */ switch ((int)(a_out->a_midmag & 0xffff)) { case ZMAGIC: virtual_offset = 0; if (a_out->a_text) { file_offset = PAGE_SIZE; } else { /* Bill's "screwball mode" */ file_offset = 0; } break; case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; /* Pass PS_STRINGS for BSD/OS binaries only. */ if (N_GETMID(*a_out) == MID_ZERO) imgp->ps_strings = aout_sysvec.sv_psstrings; break; default: /* NetBSD compatibility */ switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) { case ZMAGIC: case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; break; default: return (-1); } } bss_size = roundup(a_out->a_bss, PAGE_SIZE); /* * Check various fields in header for validity/bounds. */ if (/* entry point must lay with text region */ a_out->a_entry < virtual_offset || a_out->a_entry >= virtual_offset + a_out->a_text || /* text and data size must each be page rounded */ a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK #ifdef __amd64__ || /* overflows */ virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX #endif ) return (-1); /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > imgp->attr->va_size) return (EFAULT); /* * text/data/bss must not exceed limits */ PROC_LOCK(imgp->proc); if (/* text can't exceed maximum text size */ a_out->a_text > maxtsiz || /* data + bss can't exceed rlimit */ a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) || racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(imgp->proc); return (ENOMEM); } PROC_UNLOCK(imgp->proc); /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. */ VOP_UNLOCK(imgp->vp, 0); /* * Destroy old process VM and create a new one (with a new stack) */ error = exec_new_vmspace(imgp, &aout_sysvec); vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); /* * The vm space can be changed by exec_new_vmspace */ vmspace = imgp->proc->p_vmspace; object = imgp->object; map = &vmspace->vm_map; vm_map_lock(map); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; error = vm_map_insert(map, object, file_offset, virtual_offset, text_end, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } data_end = text_end + a_out->a_data; if (a_out->a_data) { vm_object_reference(object); error = vm_map_insert(map, object, file_offset + a_out->a_text, text_end, data_end, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } } if (bss_size) { error = vm_map_insert(map, NULL, 0, data_end, data_end + bss_size, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); return (error); } } vm_map_unlock(map); /* Fill in process VM information */ vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT; vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset; vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; imgp->proc->p_sysent = &aout_sysvec; return (0); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw aout_execsw = { .ex_imgact = exec_aout_imgact, .ex_name = "a.out" }; EXEC_SET(aout, aout_execsw); Index: head/sys/kern/init_main.c =================================================================== --- head/sys/kern/init_main.c (revision 342242) +++ head/sys/kern/init_main.c (revision 342243) @@ -1,859 +1,858 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread0_storage thread0_st __aligned(32); struct vmspace vmspace0; struct proc *initproc; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); #ifdef VERBOSE_SYSINIT /* * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to * dictate the default VERBOSE_SYSINIT behavior. Significant values for this * option and associated tunable are: * - 0, 'compiled in but silent by default' * - 1, 'compiled in but verbose by default' (default) */ int verbose_sysinit = VERBOSE_SYSINIT; TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit); #endif #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit table itself. Items are checked off as the are run. * If we want to register new sysinit types, add them to newsysinit. */ SET_DECLARE(sysinit_set, struct sysinit); struct sysinit **sysinit, **sysinit_end; struct sysinit **newsysinit, **newsysinit_end; EVENTHANDLER_LIST_DECLARE(process_init); EVENTHANDLER_LIST_DECLARE(thread_init); EVENTHANDLER_LIST_DECLARE(process_ctor); EVENTHANDLER_LIST_DECLARE(thread_ctor); /* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinit **newset; struct sysinit **sipp; struct sysinit **xipp; int count; count = set_end - set; if (newsysinit) count += newsysinit_end - newsysinit; else count += sysinit_end - sysinit; newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); if (newset == NULL) panic("cannot malloc for sysinit"); xipp = newset; if (newsysinit) for (sipp = newsysinit; sipp < newsysinit_end; sipp++) *xipp++ = *sipp; else for (sipp = sysinit; sipp < sysinit_end; sipp++) *xipp++ = *sipp; for (sipp = set; sipp < set_end; sipp++) *xipp++ = *sipp; if (newsysinit) free(newsysinit, M_TEMP); newsysinit = newset; newsysinit_end = newset + count; } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { struct sysinit **sipp; /* system initialization*/ struct sysinit **xipp; /* interior loop of sort*/ struct sysinit *save; /* bubble*/ #if defined(VERBOSE_SYSINIT) int last; int verbose; #endif TSENTER(); if (boothowto & RB_VERBOSE) bootverbose++; if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); } restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } #if defined(VERBOSE_SYSINIT) last = SI_SUB_COPYRIGHT; verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if ((*sipp)->subsystem == SI_SUB_DONE) continue; #if defined(VERBOSE_SYSINIT) if ((*sipp)->subsystem > last && verbose_sysinit != 0) { verbose = 1; last = (*sipp)->subsystem; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)(*sipp)->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)(*sipp)->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, (*sipp)->udata); } #endif /* Call function */ (*((*sipp)->func))((*sipp)->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ (*sipp)->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) free(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } TSEXIT(); /* Here so we don't overlap with start_init. */ mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "null", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = 0, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; /* * The two following SYSINIT's are proc0 specific glue code. I am not * convinced that they can not be safely combined, but their order of * operation has been maintained as the same as the original init_main.c * for right now. */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; struct uidinfo tmpuinfo; struct loginclass tmplc = { .lc_name = "", }; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; p->p_flag2 = 0; p->p_state = PRS_NORMAL; p->p_klist = knlist_alloc(&p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; /* pid_max cannot be greater than PID_MAX */ td->td_tid = PID_MAX + 1; LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); td->td_state = TDS_RUNNING; td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = curcpu; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); td->td_domain.dr_policy = td->td_cpuset->cs_domain; epoch_thread_init(td); prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; p->p_treeflag |= P_TREE_REAPER; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, 1); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ /* A hack to prevent uifind from tripping over NULL pointers. */ curthread->td_ucred = newcred; tmpuinfo.ui_uid = 1; newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo; newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_loginclass = &tmplc; newcred->cr_loginclass = loginclass_find("default"); /* End hack. creds get properly set later with thread_cow_get_proc */ curthread->td_ucred = NULL; newcred->cr_prison = &prison0; proc_set_cred_init(p, newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_fd = fdinit(NULL, false); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_free_count()); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; PROC_LOCK(p); thread_cow_get_proc(td, p); PROC_UNLOCK(p); /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; vmspace0.vm_refcnt = 1; pmap_pinit0(vmspace_pmap(&vmspace0)); /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_DIRECT_INVOKE(process_init, p); EVENTHANDLER_DIRECT_INVOKE(thread_init, td); EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct timespec ts; struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; PROC_STATUNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); /* * Give the ``random'' number generator a thump. */ nanotime(&ts); srandom(ts.tv_sec ^ ts.tv_nsec); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); static void random_init(void *dummy __unused) { /* * After CPU has been started we have some randomness on most * platforms via get_cyclecount(). For platforms that don't * we will reseed random(9) in proc0_post() as well. */ srandom(get_cyclecount()); } SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { struct image_args args; int error; char *var, *path; char *free_init_path, *tmp_init_path; struct thread *td; struct proc *p; struct vmspace *oldvmspace; TSENTER(); /* Here so we don't overlap with mi_startup. */ td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } free_init_path = tmp_init_path = strdup(init_path, M_TEMP); while ((path = strsep(&tmp_init_path, ":")) != NULL) { if (bootverbose) printf("start_init: trying %s\n", path); memset(&args, 0, sizeof(args)); error = exec_alloc_args(&args); if (error != 0) panic("%s: Can't allocate space for init arguments %d", __func__, error); error = exec_args_add_fname(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add fname %d", __func__, error); error = exec_args_add_arg(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); if (boothowto & RB_SINGLE) error = exec_args_add_arg(&args, "-s", UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); oldvmspace = td->td_proc->p_vmspace; error = kern_execve(td, &args, NULL); KASSERT(error != 0, ("kern_execve returned success, not EJUSTRETURN")); if (error == EJUSTRETURN) { if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } free(free_init_path, M_TEMP); TSEXIT(); return; } if (error != ENOENT) printf("exec %s: error %d\n", path, error); } free(free_init_path, M_TEMP); printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in its own address space. * We do this early to reserve pid 1. * * Note special case - do not make it runnable yet. Other work * in progress will change this more. */ static void create_init(const void *udata __unused) { struct fork_req fr; struct ucred *newcred, *oldcred; struct thread *td; int error; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &initproc; error = fork1(&thread0, &fr); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); crfree(td->td_ucred); td->td_ucred = crhold(initproc->p_ucred); PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); thread_unlock(td); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); Index: head/sys/kern/makesyscalls.sh =================================================================== --- head/sys/kern/makesyscalls.sh (revision 342242) +++ head/sys/kern/makesyscalls.sh (revision 342243) @@ -1,770 +1,770 @@ #! /bin/sh - # @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93 # $FreeBSD$ set -e # name of compat options: compat=COMPAT_43 compat4=COMPAT_FREEBSD4 compat6=COMPAT_FREEBSD6 compat7=COMPAT_FREEBSD7 compat10=COMPAT_FREEBSD10 compat11=COMPAT_FREEBSD11 # output files: sysnames="syscalls.c" sysproto="../sys/sysproto.h" sysproto_h=_SYS_SYSPROTO_H_ syshdr="../sys/syscall.h" sysmk="../sys/syscall.mk" syssw="init_sysent.c" syscallprefix="SYS_" switchname="sysent" namesname="syscallnames" systrace="systrace_args.c" # tmp files: sysaue="sysent.aue.$$" sysdcl="sysent.dcl.$$" syscompat="sysent.compat.$$" syscompatdcl="sysent.compatdcl.$$" syscompat4="sysent.compat4.$$" syscompat4dcl="sysent.compat4dcl.$$" syscompat6="sysent.compat6.$$" syscompat6dcl="sysent.compat6dcl.$$" syscompat7="sysent.compat7.$$" syscompat7dcl="sysent.compat7dcl.$$" syscompat10="sysent.compat10.$$" syscompat10dcl="sysent.compat10dcl.$$" syscompat11="sysent.compat11.$$" syscompat11dcl="sysent.compat11dcl.$$" sysent="sysent.switch.$$" sysinc="sysinc.switch.$$" sysarg="sysarg.switch.$$" sysprotoend="sysprotoend.$$" systracetmp="systrace.$$" systraceret="systraceret.$$" capabilities_conf="capabilities.conf" trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0 touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret case $# in 0) echo "usage: $0 input-file " 1>&2 exit 1 ;; esac if [ -n "$2" ]; then . $2 fi if [ -r $capabilities_conf ]; then capenabled=`egrep -v '^#|^$' $capabilities_conf` capenabled=`echo $capenabled | sed 's/ /,/g'` else capenabled="" fi sed -e ' # FreeBSD ID, includes, comments, and blank lines /.*\$FreeBSD/b done_joining /^[#;]/b done_joining /^$/b done_joining # Join lines ending in backslash :joining /\\$/{a\ N s/\\\n// b joining } # OBSOL, etc lines without function signatures /^[0-9][^{]*$/b done_joining # Join incomplete signatures. The { must appear on the first line # and the } must appear on the last line (modulo lines joined by # backslashes). /^[^}]*$/{a\ N s/\n// b joining } :done_joining 2,${ /^#/!s/\([{}()*,]\)/ \1 /g } ' < $1 | awk " BEGIN { sysaue = \"$sysaue\" sysdcl = \"$sysdcl\" sysproto = \"$sysproto\" sysprotoend = \"$sysprotoend\" sysproto_h = \"$sysproto_h\" syscompat = \"$syscompat\" syscompatdcl = \"$syscompatdcl\" syscompat4 = \"$syscompat4\" syscompat4dcl = \"$syscompat4dcl\" syscompat6 = \"$syscompat6\" syscompat6dcl = \"$syscompat6dcl\" syscompat7 = \"$syscompat7\" syscompat7dcl = \"$syscompat7dcl\" syscompat10 = \"$syscompat10\" syscompat10dcl = \"$syscompat10dcl\" syscompat11 = \"$syscompat11\" syscompat11dcl = \"$syscompat11dcl\" sysent = \"$sysent\" syssw = \"$syssw\" sysinc = \"$sysinc\" sysarg = \"$sysarg\" sysnames = \"$sysnames\" syshdr = \"$syshdr\" sysmk = \"$sysmk\" systrace = \"$systrace\" systracetmp = \"$systracetmp\" systraceret = \"$systraceret\" compat = \"$compat\" compat4 = \"$compat4\" compat6 = \"$compat6\" compat7 = \"$compat7\" compat10 = \"$compat10\" compat11 = \"$compat11\" syscallprefix = \"$syscallprefix\" switchname = \"$switchname\" namesname = \"$namesname\" infile = \"$1\" abi_func_prefix = \"$abi_func_prefix\" capenabled_string = \"$capenabled\" "' split(capenabled_string, capenabled, ","); printf "\n/* The casts are bogus but will do for now. */\n" > sysent printf "struct sysent %s[] = {\n",switchname > sysent printf "/*\n * System call switch table.\n *\n" > syssw printf " * DO NOT EDIT-- this file is automatically generated.\n" > syssw printf " * $%s$\n", "FreeBSD" > syssw printf " */\n\n" > syssw printf "/*\n * System call prototypes.\n *\n" > sysarg printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysarg printf " * $%s$\n", "FreeBSD" > sysarg printf " */\n\n" > sysarg printf "#ifndef %s\n", sysproto_h > sysarg printf "#define\t%s\n\n", sysproto_h > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n\n" > sysarg printf "#include \n\n" > sysarg printf "struct proc;\n\n" > sysarg printf "struct thread;\n\n" > sysarg printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) ? \\\n" > sysarg printf "\t\t0 : sizeof(register_t) - sizeof(t))\n\n" > sysarg printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg printf "#define\tPADL_(t)\t0\n" > sysarg printf "#define\tPADR_(t)\tPAD_(t)\n" > sysarg printf "#else\n" > sysarg printf "#define\tPADL_(t)\tPAD_(t)\n" > sysarg printf "#define\tPADR_(t)\t0\n" > sysarg printf "#endif\n\n" > sysarg printf "\n#ifdef %s\n\n", compat > syscompat printf "\n#ifdef %s\n\n", compat4 > syscompat4 printf "\n#ifdef %s\n\n", compat6 > syscompat6 printf "\n#ifdef %s\n\n", compat7 > syscompat7 printf "\n#ifdef %s\n\n", compat10 > syscompat10 printf "\n#ifdef %s\n\n", compat11 > syscompat11 printf "/*\n * System call names.\n *\n" > sysnames printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames printf " * $%s$\n", "FreeBSD" > sysnames printf " */\n\n" > sysnames printf "const char *%s[] = {\n", namesname > sysnames printf "/*\n * System call numbers.\n *\n" > syshdr printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr printf " * $%s$\n", "FreeBSD" > syshdr printf " */\n\n" > syshdr printf "# FreeBSD system call object files.\n" > sysmk printf "# DO NOT EDIT-- this file is automatically generated.\n" > sysmk printf "# $%s$\n", "FreeBSD" > sysmk printf "MIASM = " > sysmk printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace printf " * $%s$\n", "FreeBSD" > systrace printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace printf "\tint64_t *iarg = (int64_t *) uarg;\n" > systrace printf "\tswitch (sysnum) {\n" > systrace printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp printf "\tswitch (sysnum) {\n" > systracetmp printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret printf "\tswitch (sysnum) {\n" > systraceret } NR == 1 { next } NF == 0 || $1 ~ /^;/ { next } $1 ~ /^#[ ]*include/ { print > sysinc next } $1 ~ /^#[ ]*if/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret savesyscall = syscall next } $1 ~ /^#[ ]*else/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret syscall = savesyscall next } $1 ~ /^#/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret next } # Returns true if the type "name" is the first flag in the type field function type(name, flags, n) { n = split($3, flags, /\|/) return (n > 0 && flags[1] == name) } # Returns true if the flag "name" is set in the type field function flag(name, flags, i, n) { n = split($3, flags, /\|/) for (i = 1; i <= n; i++) if (flags[i] == name) return 1 return 0 } { n = split($1, syscall_range, /-/) if (n == 1) { syscall_range[2] = syscall_range[1] } else if (n == 2) { if (!type("UNIMPL")) { printf "%s: line %d: range permitted only with UNIMPL\n", infile, NR exit 1 } } else { printf "%s: line %d: invalid syscall number or range %s\n", infile, NR, $1 exit 1 } } syscall != syscall_range[1] { printf "%s: line %d: syscall number out of sync at %d\n", infile, NR, syscall printf "line is:\n" print exit 1 } function align_sysent_comment(column) { printf("\t") > sysent column = column + 8 - column % 8 while (column < 56) { printf("\t") > sysent column = column + 8 } } function parserr(was, wanted) { printf "%s: line %d: unexpected %s (expected %s)\n", infile, NR, was, wanted exit 1 } function parseline() { f=4 # toss number, type, audit event ret_inc = 0 argc= 0; argssize = "0" thr_flag = "SY_THR_STATIC" if (flag("NOTSTATIC")) { thr_flag = "SY_THR_ABSENT" } if ($NF != "}") { funcalias=$(NF-2) argalias=$(NF-1) rettype=$NF end=NF-3 } else { funcalias="" argalias="" rettype="int" if ($(f+2) == "*") { ret_inc = 1 } end=NF } if (flag("NODEF")) { auditev="AUE_NULL" funcname=$(4 + ret_inc) argssize = "AS(" $(6 + ret_inc) ")" return } if ($f != "{") parserr($f, "{") f++ if ($end != "}") parserr($end, "}") end-- if ($end != ";") parserr($end, ";") end-- if ($end != ")") parserr($end, ")") end-- syscallret=$f f++ while (ret_inc > 0) { syscallret=syscallret " " $f f++ ret_inc-- } funcname=$f # # We now know the func name, so define a flags field for it. # Do this before any other processing as we may return early # from it. # for (cap in capenabled) { if (funcname == capenabled[cap] || funcname == abi_func_prefix capenabled[cap]) { flags = "SYF_CAPENABLED"; break; } } if (funcalias == "") funcalias = funcname if (argalias == "") { argalias = funcname "_args" if (flag("COMPAT")) argalias = "o" argalias if (flag("COMPAT4")) argalias = "freebsd4_" argalias if (flag("COMPAT6")) argalias = "freebsd6_" argalias if (flag("COMPAT7")) argalias = "freebsd7_" argalias if (flag("COMPAT10")) argalias = "freebsd10_" argalias if (flag("COMPAT11")) argalias = "freebsd11_" argalias } f++ if ($f != "(") parserr($f, ")") f++ if (f == end) { if ($f != "void") parserr($f, "argument definition") return } while (f <= end) { argc++ argtype[argc]="" oldf="" while (f < end && $(f+1) != ",") { if (argtype[argc] != "" && oldf != "*") argtype[argc] = argtype[argc]" "; argtype[argc] = argtype[argc]$f; oldf = $f; f++ } if (argtype[argc] == "") parserr($f, "argument definition") # The parser adds space around parens. # Remove it from annotations. gsub(/ \( /, "(", argtype[argc]); gsub(/ \)/, ")", argtype[argc]); #remove annotations gsub(/_In[^ ]*[_)] /, "", argtype[argc]); gsub(/_Out[^ ]*[_)] /, "", argtype[argc]); argname[argc]=$f; f += 2; # skip name, and any comma } if (argc != 0) argssize = "AS(" argalias ")" } { comment = $4 if (NF < 7) for (i = 5; i <= NF; i++) comment = comment " " $i } # # The AUE_ audit event identifier. # { auditev = $2; } # # The flags, if any. # { flags = "0"; } type("STD") || type("NODEF") || type("NOARGS") || type("NOPROTO") \ || type("NOSTD") { parseline() printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret if (argc > 0) { printf("\t\tswitch(ndx) {\n") > systracetmp printf("\t\tstruct %s *p = params;\n", argalias) > systrace for (i = 1; i <= argc; i++) { arg = argtype[i] sub("__restrict$", "", arg) if (index(arg, "*") > 0) printf("\t\tcase %d:\n\t\t\tp = \"userland %s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp else printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp if (index(arg, "*") > 0 || arg == "caddr_t") printf("\t\tuarg[%d] = (intptr_t) p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else if (arg == "union l_semun") printf("\t\tuarg[%d] = p->%s.buf; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else if (substr(arg, 1, 1) == "u" || arg == "size_t") printf("\t\tuarg[%d] = p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else printf("\t\tiarg[%d] = p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace } printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret printf("\t\t\tp = \"%s\";\n", syscallret) > systraceret printf("\t\tbreak;\n") > systraceret } printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace printf("\t\tbreak;\n") > systracetmp if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \ !flag("NODEF")) { printf("struct %s {\n", argalias) > sysarg for (i = 1; i <= argc; i++) printf("\tchar %s_l_[PADL_(%s)]; " \ "%s %s; char %s_r_[PADR_(%s)];\n", argname[i], argtype[i], argtype[i], argname[i], argname[i], argtype[i]) > sysarg printf("};\n") > sysarg } else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF")) printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg if (!flag("NOPROTO") && !flag("NODEF")) { if (funcname == "nosys" || funcname == "lkmnosys" || funcname == "sysarch" || funcname ~ /^freebsd/ || - funcname ~ /^linux/ || funcname ~ /^ibcs2/ || - funcname ~ /^xenix/ || funcname ~ /^cloudabi/) { + funcname ~ /^linux/ || funcname ~ /^xenix/ || + funcname ~ /^cloudabi/) { printf("%s\t%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } else { printf("%s\tsys_%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } printf(";\n") > sysdcl printf("#define\t%sAUE_%s\t%s\n", syscallprefix, funcalias, auditev) > sysaue } printf("\t{ %s, (sy_call_t *)", argssize) > sysent column = 8 + 2 + length(argssize) + 15 if (flag("NOSTD")) { printf("lkmressys, AUE_NULL, NULL, 0, 0, %s, SY_THR_ABSENT },", flags) > sysent column = column + length("lkmressys") + length("AUE_NULL") + 3 } else { if (funcname == "nosys" || funcname == "sysarch" || funcname == "lkmnosys" || funcname ~ /^freebsd/ || - funcname ~ /^linux/ || funcname ~ /^ibcs2/ || - funcname ~ /^xenix/ || funcname ~ /^cloudabi/) { + funcname ~ /^linux/ || funcname ~ /^xenix/ || + funcname ~ /^cloudabi/) { printf("%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 } else { printf("sys_%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 + 4 } } align_sysent_comment(column) printf("/* %d = %s */\n", syscall, funcalias) > sysent printf("\t\"%s\",\t\t\t/* %d = %s */\n", funcalias, syscall, funcalias) > sysnames if (!flag("NODEF")) { printf("#define\t%s%s\t%d\n", syscallprefix, funcalias, syscall) > syshdr printf(" \\\n\t%s.o", funcalias) > sysmk } syscall++ next } type("COMPAT") || type("COMPAT4") || type("COMPAT6") || \ type("COMPAT7") || type("COMPAT10") || type("COMPAT11") { if (flag("COMPAT")) { ncompat++ out = syscompat outdcl = syscompatdcl wrap = "compat" prefix = "o" descr = "old" } else if (flag("COMPAT4")) { ncompat4++ out = syscompat4 outdcl = syscompat4dcl wrap = "compat4" prefix = "freebsd4_" descr = "freebsd4" } else if (flag("COMPAT6")) { ncompat6++ out = syscompat6 outdcl = syscompat6dcl wrap = "compat6" prefix = "freebsd6_" descr = "freebsd6" } else if (flag("COMPAT7")) { ncompat7++ out = syscompat7 outdcl = syscompat7dcl wrap = "compat7" prefix = "freebsd7_" descr = "freebsd7" } else if (flag("COMPAT10")) { ncompat10++ out = syscompat10 outdcl = syscompat10dcl wrap = "compat10" prefix = "freebsd10_" descr = "freebsd10" } else if (flag("COMPAT11")) { ncompat11++ out = syscompat11 outdcl = syscompat11dcl wrap = "compat11" prefix = "freebsd11_" descr = "freebsd11" } parseline() if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \ !flag("NODEF")) { printf("struct %s {\n", argalias) > out for (i = 1; i <= argc; i++) printf("\tchar %s_l_[PADL_(%s)]; %s %s; " \ "char %s_r_[PADR_(%s)];\n", argname[i], argtype[i], argtype[i], argname[i], argname[i], argtype[i]) > out printf("};\n") > out } else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF")) printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg if (!flag("NOPROTO") && !flag("NODEF")) { printf("%s\t%s%s(struct thread *, struct %s *);\n", rettype, prefix, funcname, argalias) > outdcl printf("#define\t%sAUE_%s%s\t%s\n", syscallprefix, prefix, funcname, auditev) > sysaue } if (flag("NOSTD")) { printf("\t{ %s, (sy_call_t *)%s, %s, NULL, 0, 0, 0, SY_THR_ABSENT },", "0", "lkmressys", "AUE_NULL") > sysent align_sysent_comment(8 + 2 + length("0") + 15 + \ length("lkmressys") + length("AUE_NULL") + 3) } else { printf("\t{ %s(%s,%s), %s, NULL, 0, 0, %s, %s },", wrap, argssize, funcname, auditev, flags, thr_flag) > sysent align_sysent_comment(8 + 9 + length(argssize) + 1 + \ length(funcname) + length(auditev) + \ length(flags) + 4) } printf("/* %d = %s %s */\n", syscall, descr, funcalias) > sysent printf("\t\"%s.%s\",\t\t/* %d = %s %s */\n", wrap, funcalias, syscall, descr, funcalias) > sysnames # Do not provide freebsdN_* symbols in libc for < FreeBSD 7 if (flag("COMPAT") || flag("COMPAT4") || flag("COMPAT6")) { printf("\t\t\t\t/* %d is %s %s */\n", syscall, descr, funcalias) > syshdr } else if (!flag("NODEF")) { printf("#define\t%s%s%s\t%d\n", syscallprefix, prefix, funcalias, syscall) > syshdr printf(" \\\n\t%s%s.o", prefix, funcalias) > sysmk } syscall++ next } type("OBSOL") { printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },") > sysent align_sysent_comment(34) printf("/* %d = obsolete %s */\n", syscall, comment) > sysent printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", $4, syscall, comment) > sysnames printf("\t\t\t\t/* %d is obsolete %s */\n", syscall, comment) > syshdr syscall++ next } type("UNIMPL") { while (syscall <= syscall_range[2]) { printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },\t\t\t/* %d = %s */\n", syscall, comment) > sysent printf("\t\"#%d\",\t\t\t/* %d = %s */\n", syscall, syscall, comment) > sysnames syscall++ } next } { printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $3 exit 1 } END { printf "\n#define AS(name) (sizeof(struct name) / sizeof(register_t))\n" > sysinc if (ncompat != 0) { printf "\n#ifdef %s\n", compat > sysinc printf "#define compat(n, name) n, (sy_call_t *)__CONCAT(o,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat4 != 0) { printf "\n#ifdef %s\n", compat4 > sysinc printf "#define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat4(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat6 != 0) { printf "\n#ifdef %s\n", compat6 > sysinc printf "#define compat6(n, name) n, (sy_call_t *)__CONCAT(freebsd6_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat6(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat7 != 0) { printf "\n#ifdef %s\n", compat7 > sysinc printf "#define compat7(n, name) n, (sy_call_t *)__CONCAT(freebsd7_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat7(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat10 != 0) { printf "\n#ifdef %s\n", compat10 > sysinc printf "#define compat10(n, name) n, (sy_call_t *)__CONCAT(freebsd10_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat10(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat11 != 0) { printf "\n#ifdef %s\n", compat11 > sysinc printf "#define compat11(n, name) n, (sy_call_t *)__CONCAT(freebsd11_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat11(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } printf("\n#endif /* %s */\n\n", compat) > syscompatdcl printf("\n#endif /* %s */\n\n", compat4) > syscompat4dcl printf("\n#endif /* %s */\n\n", compat6) > syscompat6dcl printf("\n#endif /* %s */\n\n", compat7) > syscompat7dcl printf("\n#endif /* %s */\n\n", compat10) > syscompat10dcl printf("\n#endif /* %s */\n\n", compat11) > syscompat11dcl printf("\n#undef PAD_\n") > sysprotoend printf("#undef PADL_\n") > sysprotoend printf("#undef PADR_\n") > sysprotoend printf("\n#endif /* !%s */\n", sysproto_h) > sysprotoend printf("\n") > sysmk printf("};\n") > sysent printf("};\n") > sysnames printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall) \ > syshdr printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret } ' cat $sysinc $sysent >> $syssw cat $sysarg $sysdcl \ $syscompat $syscompatdcl \ $syscompat4 $syscompat4dcl \ $syscompat6 $syscompat6dcl \ $syscompat7 $syscompat7dcl \ $syscompat10 $syscompat10dcl \ $syscompat11 $syscompat11dcl \ $sysaue $sysprotoend > $sysproto cat $systracetmp >> $systrace cat $systraceret >> $systrace Index: head/sys/mips/mips/elf_machdep.c =================================================================== --- head/sys/mips/mips/elf_machdep.c (revision 342242) +++ head/sys/mips/mips/elf_machdep.c (revision 342243) @@ -1,554 +1,552 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: src/sys/i386/i386/elf_machdep.c,v 1.20 2004/08/11 02:35:05 marcel */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __mips_n64 struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } #else struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } #endif /* * The following MIPS relocation code for tracking multiple * consecutive HI32/LO32 entries is because of the following: * * https://dmz-portal.mips.com/wiki/MIPS_relocation_types * * === * * + R_MIPS_HI16 * * An R_MIPS_HI16 must be followed eventually by an associated R_MIPS_LO16 * relocation record in the same SHT_REL section. The contents of the two * fields to be relocated are combined to form a full 32-bit addend AHL. * An R_MIPS_LO16 entry which does not immediately follow a R_MIPS_HI16 is * combined with the most recent one encountered, i.e. multiple R_MIPS_LO16 * entries may be associated with a single R_MIPS_HI16. Use of these * relocation types in a SHT_REL section is discouraged and may be * forbidden to avoid this complication. * * A GNU extension allows multiple R_MIPS_HI16 records to share the same * R_MIPS_LO16 relocation record(s). The association works like this within * a single relocation section: * * + From the beginning of the section moving to the end of the section, * until R_MIPS_LO16 is not found each found R_MIPS_HI16 relocation will * be associated with the first R_MIPS_LO16. * * + Until another R_MIPS_HI16 record is found all found R_MIPS_LO16 * relocations found are associated with the last R_MIPS_HI16. * * === * * This is so gcc can do dead code detection/removal without having to * generate HI/LO pairs even if one of them would be deleted. * * So, the summary is: * * + A HI16 entry must occur before any LO16 entries; * + Multiple consecutive HI16 RELA entries need to be buffered until the * first LO16 RELA entry occurs - and then all HI16 RELA relocations use * the offset in the LOW16 RELA for calculating their offsets; * + The last HI16 RELA entry before a LO16 RELA entry is used (the AHL) * for the first subsequent LO16 calculation; * + If multiple consecutive LO16 RELA entries occur, only the first * LO16 RELA entry triggers an update of buffered HI16 RELA entries; * any subsequent LO16 RELA entry before another HI16 RELA entry will * not cause any further updates to the HI16 RELA entries. * * Additionally, flush out any outstanding HI16 entries that don't have * a LO16 entry in case some garbage entries are left in the file. */ struct mips_tmp_reloc; struct mips_tmp_reloc { struct mips_tmp_reloc *next; Elf_Addr ahl; Elf32_Addr *where_hi16; }; static struct mips_tmp_reloc *ml = NULL; /* * Add a temporary relocation (ie, a HI16 reloc type.) */ static int mips_tmp_reloc_add(Elf_Addr ahl, Elf32_Addr *where_hi16) { struct mips_tmp_reloc *r; r = malloc(sizeof(struct mips_tmp_reloc), M_TEMP, M_NOWAIT); if (r == NULL) { printf("%s: failed to malloc\n", __func__); return (0); } r->ahl = ahl; r->where_hi16 = where_hi16; r->next = ml; ml = r; return (1); } /* * Flush the temporary relocation list. * * This should be done after a file is completely loaded * so no stale relocations exist to confuse the next * load. */ static void mips_tmp_reloc_flush(void) { struct mips_tmp_reloc *r, *rn; r = ml; ml = NULL; while (r != NULL) { rn = r->next; free(r, M_TEMP); r = rn; } } /* * Get an entry from the reloc list; or NULL if we've run out. */ static struct mips_tmp_reloc * mips_tmp_reloc_get(void) { struct mips_tmp_reloc *r; r = ml; if (r == NULL) return (NULL); ml = ml->next; return (r); } /* * Free a relocation entry. */ static void mips_tmp_reloc_free(struct mips_tmp_reloc *r) { free(r, M_TEMP); } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf32_Addr *where = (Elf32_Addr *)NULL; Elf_Addr addr; Elf_Addr addend = (Elf_Addr)0; Elf_Word rtype = (Elf_Word)0, symidx; struct mips_tmp_reloc *r; const Elf_Rel *rel = NULL; const Elf_Rela *rela = NULL; int error; /* Store the last seen ahl from a HI16 for LO16 processing */ static Elf_Addr last_ahl; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf32_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); switch (rtype) { case R_MIPS_64: addend = *(Elf64_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf32_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } switch (rtype) { case R_MIPS_NONE: /* none */ break; case R_MIPS_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*where != addr) *where = (Elf32_Addr)addr; break; case R_MIPS_26: /* ((A << 2) | (P & 0xf0000000) + S) >> 2 */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addend &= 0x03ffffff; /* * Addendum for .rela R_MIPS_26 is not shifted right */ if (rela == NULL) addend <<= 2; addr += ((Elf_Addr)where & 0xf0000000) | addend; addr >>= 2; *where &= ~0x03ffffff; *where |= addr & 0x03ffffff; break; case R_MIPS_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*(Elf64_Addr*)where != addr) *(Elf64_Addr*)where = addr; break; /* * Handle the two GNU extension cases: * * + Multiple HI16s followed by a LO16, and * + A HI16 followed by multiple LO16s. * * The former is tricky - the HI16 relocations need * to be buffered until a LO16 occurs, at which point * each HI16 is replayed against the LO16 relocation entry * (with the relevant overflow information.) * * The latter should be easy to handle - when the * first LO16 is seen, write out and flush the * HI16 buffer. Any subsequent LO16 entries will * find a blank relocation buffer. * */ case R_MIPS_HI16: /* ((AHL + S) - ((short)(AHL + S)) >> 16 */ if (rela != NULL) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= ((((long long) addr + 0x8000LL) >> 16) & 0xffff); } else { /* * Add a temporary relocation to the list; * will pop it off / free the list when * we've found a suitable HI16. */ if (mips_tmp_reloc_add(addend << 16, where) == 0) return (-1); /* * Track the last seen HI16 AHL for use by * the first LO16 AHL calculation. * * The assumption is any intermediary deleted * LO16's were optimised out, so the last * HI16 before the LO16 is the "true" relocation * entry to use for that LO16 write. */ last_ahl = addend << 16; } break; case R_MIPS_LO16: /* AHL + S */ if (rela != NULL) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= addr & 0xffff; } else { Elf_Addr tmp_ahl; Elf_Addr tmp_addend; tmp_ahl = last_ahl + (int16_t) addend; error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); tmp_addend = addend & 0xffff0000; /* Use the last seen ahl for calculating addend */ tmp_addend |= (uint16_t)(tmp_ahl + addr); *where = tmp_addend; /* * This logic implements the "we saw multiple HI16 * before a LO16" assignment /and/ "we saw multiple * LO16s". * * Multiple LO16s will be handled as a blank * relocation list. * * Multple HI16's are iterated over here. */ while ((r = mips_tmp_reloc_get()) != NULL) { Elf_Addr rahl; /* * We have the ahl from the HI16 entry, so * offset it by the 16 bits of the low ahl. */ rahl = r->ahl; rahl += (int16_t) addend; tmp_addend = *(r->where_hi16); tmp_addend &= 0xffff0000; tmp_addend |= ((rahl + addr) - (int16_t)(rahl + addr)) >> 16; *(r->where_hi16) = tmp_addend; mips_tmp_reloc_free(r); } } break; case R_MIPS_HIGHER: /* %higher(A+S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= (((long long)addr + 0x80008000LL) >> 32) & 0xffff; break; case R_MIPS_HIGHEST: /* %highest(A+S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= (((long long)addr + 0x800080008000LL) >> 48) & 0xffff; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return (-1); } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { /* * Sync the I and D caches to make sure our relocations are visible. */ mips_icache_sync_all(); /* Flush outstanding relocations */ mips_tmp_reloc_flush(); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/mips/mips/freebsd32_machdep.c =================================================================== --- head/sys/mips/mips/freebsd32_machdep.c (revision 342242) +++ head/sys/mips/mips/freebsd32_machdep.c (revision 342243) @@ -1,495 +1,494 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Juli Mallett * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Based on nwhitehorn's COMPAT_FREEBSD32 support code for PowerPC64. */ #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void freebsd32_exec_setregs(struct thread *, struct image_params *, u_long); static int get_mcontext32(struct thread *, mcontext32_t *, int); static int set_mcontext32(struct thread *, mcontext32_t *); static void freebsd32_sendsig(sig_t, ksiginfo_t *, sigset_t *); extern const char *freebsd32_syscallnames[]; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = freebsd32_sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = ((vm_offset_t)0x80000000), .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static void freebsd32_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { exec_setregs(td, imgp, stack); /* * See comment in exec_setregs about running 32-bit binaries with 64-bit * registers. */ td->td_frame->sp -= 65536; /* * Clear extended address space bit for userland. */ td->td_frame->sr &= ~MIPS_SR_UX; td->td_md.md_tls_tcb_offset = TLS_TP_OFFSET + TLS_TCB_SIZE32; } int set_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; for (i = 0; i < NUMSAVEREGS; i++) r.r_regs[i] = regs->r_regs[i]; return (set_regs(td, &r)); } int fill_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; int error; error = fill_regs(td, &r); if (error != 0) return (error); for (i = 0; i < NUMSAVEREGS; i++) regs->r_regs[i] = r.r_regs[i]; return (0); } int set_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; for (i = 0; i < NUMFPREGS; i++) fp.r_regs[i] = fpregs->r_regs[i]; return (set_fpregs(td, &fp)); } int fill_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; int error; error = fill_fpregs(td, &fp); if (error != 0) return (error); for (i = 0; i < NUMFPREGS; i++) fpregs->r_regs[i] = fp.r_regs[i]; return (0); } static int get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { mcontext_t mcp64; unsigned i; int error; error = get_mcontext(td, &mcp64, flags); if (error != 0) return (error); mcp->mc_onstack = mcp64.mc_onstack; mcp->mc_pc = mcp64.mc_pc; for (i = 0; i < 32; i++) mcp->mc_regs[i] = mcp64.mc_regs[i]; mcp->sr = mcp64.sr; mcp->mullo = mcp64.mullo; mcp->mulhi = mcp64.mulhi; mcp->mc_fpused = mcp64.mc_fpused; for (i = 0; i < 33; i++) mcp->mc_fpregs[i] = mcp64.mc_fpregs[i]; mcp->mc_fpc_eir = mcp64.mc_fpc_eir; mcp->mc_tls = (int32_t)(intptr_t)mcp64.mc_tls; return (0); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { mcontext_t mcp64; unsigned i; mcp64.mc_onstack = mcp->mc_onstack; mcp64.mc_pc = mcp->mc_pc; for (i = 0; i < 32; i++) mcp64.mc_regs[i] = mcp->mc_regs[i]; mcp64.sr = mcp->sr; mcp64.mullo = mcp->mullo; mcp64.mulhi = mcp->mulhi; mcp64.mc_fpused = mcp->mc_fpused; for (i = 0; i < 33; i++) mcp64.mc_fpregs[i] = mcp->mc_fpregs[i]; mcp64.mc_fpc_eir = mcp->mc_fpc_eir; mcp64.mc_tls = (void *)(intptr_t)mcp->mc_tls; return (set_mcontext(td, &mcp64)); } int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); #if 0 CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); #endif return (EJUSTRETURN); } /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } #define UCONTEXT_MAGIC 0xACEDBADE /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct fpreg32 fpregs; struct reg32 regs; struct sigacts *psp; struct sigframe32 sf, *sfp; int sig; int oonstack; unsigned i; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); fill_regs32(td, ®s); oonstack = sigonstack(td->td_frame->sp); /* save user context */ bzero(&sf, sizeof sf); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp; sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC]; sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO]; sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI]; sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls; sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ for (i = 1; i < 32; i++) sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i]; sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); fill_fpregs32(td, &fpregs); for (i = 0; i < 33; i++) sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i]; } /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe32 *)(((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); } else sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); /* Build the argument list for the signal handler. */ td->td_frame->a0 = sig; td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si; /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = td->td_frame->badvaddr; } else { /* Old FreeBSD-style arguments. */ td->td_frame->a1 = ksi->ksi_code; td->td_frame->a3 = td->td_frame->badvaddr; /* sf.sf_ahu.sf_handler = catcher; */ } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); } td->td_frame->pc = (register_t)(intptr_t)catcher; td->td_frame->t9 = (register_t)(intptr_t)catcher; td->td_frame->sp = (register_t)(intptr_t)sfp; /* * Signal trampoline code is at base of user stack. */ td->td_frame->ra = (register_t)(intptr_t)FREEBSD32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) { int error; int32_t tlsbase; switch (uap->op) { case MIPS_SET_TLS: td->td_md.md_tls = (void *)(intptr_t)uap->parms; return (0); case MIPS_GET_TLS: tlsbase = (int32_t)(intptr_t)td->td_md.md_tls; error = copyout(&tlsbase, uap->parms, sizeof(tlsbase)); return (error); default: break; } return (EINVAL); } void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } Index: head/sys/mips/mips/trap.c =================================================================== --- head/sys/mips/mips/trap.c (revision 342242) +++ head/sys/mips/mips/trap.c (revision 342243) @@ -1,1713 +1,1711 @@ /* $OpenBSD: trap.c,v 1.19 1998/09/30 12:40:41 pefo Exp $ */ /* tracked to 1.23 */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988 University of Utah. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and Ralph Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: trap.c 1.32 91/04/06 * * from: @(#)trap.c 8.5 (Berkeley) 1/11/94 * JNPR: trap.c,v 1.13.2.2 2007/08/29 10:03:49 girish */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #include #include #endif #ifdef KDTRACE_HOOKS #include #endif #ifdef TRAP_DEBUG int trap_debug = 0; SYSCTL_INT(_machdep, OID_AUTO, trap_debug, CTLFLAG_RW, &trap_debug, 0, "Debug information on all traps"); #endif #define lbu_macro(data, addr) \ __asm __volatile ("lbu %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define lb_macro(data, addr) \ __asm __volatile ("lb %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define lwl_macro(data, addr) \ __asm __volatile ("lwl %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define lwr_macro(data, addr) \ __asm __volatile ("lwr %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define ldl_macro(data, addr) \ __asm __volatile ("ldl %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define ldr_macro(data, addr) \ __asm __volatile ("ldr %0, 0x0(%1)" \ : "=r" (data) /* outputs */ \ : "r" (addr)); /* inputs */ #define sb_macro(data, addr) \ __asm __volatile ("sb %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define swl_macro(data, addr) \ __asm __volatile ("swl %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define swr_macro(data, addr) \ __asm __volatile ("swr %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define sdl_macro(data, addr) \ __asm __volatile ("sdl %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ #define sdr_macro(data, addr) \ __asm __volatile ("sdr %0, 0x0(%1)" \ : /* outputs */ \ : "r" (data), "r" (addr)); /* inputs */ static void log_illegal_instruction(const char *, struct trapframe *); static void log_bad_page_fault(char *, struct trapframe *, int); static void log_frame_dump(struct trapframe *frame); static void get_mapping_info(vm_offset_t, pd_entry_t **, pt_entry_t **); int (*dtrace_invop_jump_addr)(struct trapframe *); #ifdef TRAP_DEBUG static void trap_frame_dump(struct trapframe *frame); #endif void (*machExceptionTable[]) (void)= { /* * The kernel exception handlers. */ MipsKernIntr, /* external interrupt */ MipsKernGenException, /* TLB modification */ MipsTLBInvalidException,/* TLB miss (load or instr. fetch) */ MipsTLBInvalidException,/* TLB miss (store) */ MipsKernGenException, /* address error (load or I-fetch) */ MipsKernGenException, /* address error (store) */ MipsKernGenException, /* bus error (I-fetch) */ MipsKernGenException, /* bus error (load or store) */ MipsKernGenException, /* system call */ MipsKernGenException, /* breakpoint */ MipsKernGenException, /* reserved instruction */ MipsKernGenException, /* coprocessor unusable */ MipsKernGenException, /* arithmetic overflow */ MipsKernGenException, /* trap exception */ MipsKernGenException, /* virtual coherence exception inst */ MipsKernGenException, /* floating point exception */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* watch exception */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* reserved */ MipsKernGenException, /* virtual coherence exception data */ /* * The user exception handlers. */ MipsUserIntr, /* 0 */ MipsUserGenException, /* 1 */ MipsTLBInvalidException,/* 2 */ MipsTLBInvalidException,/* 3 */ MipsUserGenException, /* 4 */ MipsUserGenException, /* 5 */ MipsUserGenException, /* 6 */ MipsUserGenException, /* 7 */ MipsUserGenException, /* 8 */ MipsUserGenException, /* 9 */ MipsUserGenException, /* 10 */ MipsUserGenException, /* 11 */ MipsUserGenException, /* 12 */ MipsUserGenException, /* 13 */ MipsUserGenException, /* 14 */ MipsUserGenException, /* 15 */ MipsUserGenException, /* 16 */ MipsUserGenException, /* 17 */ MipsUserGenException, /* 18 */ MipsUserGenException, /* 19 */ MipsUserGenException, /* 20 */ MipsUserGenException, /* 21 */ MipsUserGenException, /* 22 */ MipsUserGenException, /* 23 */ MipsUserGenException, /* 24 */ MipsUserGenException, /* 25 */ MipsUserGenException, /* 26 */ MipsUserGenException, /* 27 */ MipsUserGenException, /* 28 */ MipsUserGenException, /* 29 */ MipsUserGenException, /* 20 */ MipsUserGenException, /* 31 */ }; char *trap_type[] = { "external interrupt", "TLB modification", "TLB miss (load or instr. fetch)", "TLB miss (store)", "address error (load or I-fetch)", "address error (store)", "bus error (I-fetch)", "bus error (load or store)", "system call", "breakpoint", "reserved instruction", "coprocessor unusable", "arithmetic overflow", "trap", "virtual coherency instruction", "floating point", "reserved 16", "reserved 17", "reserved 18", "reserved 19", "reserved 20", "reserved 21", "reserved 22", "watch", "reserved 24", "reserved 25", "reserved 26", "reserved 27", "reserved 28", "reserved 29", "reserved 30", "virtual coherency data", }; #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) struct trapdebug trapdebug[TRAPSIZE], *trp = trapdebug; #endif #define KERNLAND(x) ((vm_offset_t)(x) >= VM_MIN_KERNEL_ADDRESS && (vm_offset_t)(x) < VM_MAX_KERNEL_ADDRESS) #define DELAYBRANCH(x) ((x) & MIPS_CR_BR_DELAY) /* * MIPS load/store access type */ enum { MIPS_LHU_ACCESS = 1, MIPS_LH_ACCESS, MIPS_LWU_ACCESS, MIPS_LW_ACCESS, MIPS_LD_ACCESS, MIPS_SH_ACCESS, MIPS_SW_ACCESS, MIPS_SD_ACCESS }; char *access_name[] = { "Load Halfword Unsigned", "Load Halfword", "Load Word Unsigned", "Load Word", "Load Doubleword", "Store Halfword", "Store Word", "Store Doubleword" }; #ifdef CPU_CNMIPS #include #endif static int allow_unaligned_acc = 1; SYSCTL_INT(_vm, OID_AUTO, allow_unaligned_acc, CTLFLAG_RW, &allow_unaligned_acc, 0, "Allow unaligned accesses"); /* * FP emulation is assumed to work on O32, but the code is outdated and crufty * enough that it's a more sensible default to have it disabled when using * other ABIs. At the very least, it needs a lot of help in using * type-semantic ABI-oblivious macros for everything it does. */ #if defined(__mips_o32) static int emulate_fp = 1; #else static int emulate_fp = 0; #endif SYSCTL_INT(_machdep, OID_AUTO, emulate_fp, CTLFLAG_RW, &emulate_fp, 0, "Emulate unimplemented FPU instructions"); static int emulate_unaligned_access(struct trapframe *frame, int mode); extern void fswintrberr(void); /* XXX */ int cpu_fetch_syscall_args(struct thread *td) { struct trapframe *locr0; struct sysentvec *se; struct syscall_args *sa; int error, nsaved; locr0 = td->td_frame; sa = &td->td_sa; bzero(sa->args, sizeof(sa->args)); /* compute next PC after syscall instruction */ td->td_pcb->pcb_tpc = sa->trapframe->pc; /* Remember if restart */ if (DELAYBRANCH(sa->trapframe->cause)) /* Check BD bit */ locr0->pc = MipsEmulateBranch(locr0, sa->trapframe->pc, 0, 0); else locr0->pc += sizeof(int); sa->code = locr0->v0; switch (sa->code) { case SYS___syscall: case SYS_syscall: /* * This is an indirect syscall, in which the code is the first argument. */ #if (!defined(__mips_n32) && !defined(__mips_n64)) || defined(COMPAT_FREEBSD32) if (sa->code == SYS___syscall && SV_PROC_FLAG(td->td_proc, SV_ILP32)) { /* * Like syscall, but code is a quad, so as to maintain alignment * for the rest of the arguments. */ if (_QUAD_LOWWORD == 0) sa->code = locr0->a0; else sa->code = locr0->a1; sa->args[0] = locr0->a2; sa->args[1] = locr0->a3; nsaved = 2; break; } #endif /* * This is either not a quad syscall, or is a quad syscall with a * new ABI in which quads fit in a single register. */ sa->code = locr0->a0; sa->args[0] = locr0->a1; sa->args[1] = locr0->a2; sa->args[2] = locr0->a3; nsaved = 3; #if defined(__mips_n32) || defined(__mips_n64) #ifdef COMPAT_FREEBSD32 if (!SV_PROC_FLAG(td->td_proc, SV_ILP32)) { #endif /* * Non-o32 ABIs support more arguments in registers. */ sa->args[3] = locr0->a4; sa->args[4] = locr0->a5; sa->args[5] = locr0->a6; sa->args[6] = locr0->a7; nsaved += 4; #ifdef COMPAT_FREEBSD32 } #endif #endif break; default: /* * A direct syscall, arguments are just parameters to the syscall. */ sa->args[0] = locr0->a0; sa->args[1] = locr0->a1; sa->args[2] = locr0->a2; sa->args[3] = locr0->a3; nsaved = 4; #if defined (__mips_n32) || defined(__mips_n64) #ifdef COMPAT_FREEBSD32 if (!SV_PROC_FLAG(td->td_proc, SV_ILP32)) { #endif /* * Non-o32 ABIs support more arguments in registers. */ sa->args[4] = locr0->a4; sa->args[5] = locr0->a5; sa->args[6] = locr0->a6; sa->args[7] = locr0->a7; nsaved += 4; #ifdef COMPAT_FREEBSD32 } #endif #endif break; } #ifdef TRAP_DEBUG if (trap_debug) printf("SYSCALL #%d pid:%u\n", sa->code, td->td_proc->p_pid); #endif se = td->td_proc->p_sysent; /* * XXX * Shouldn't this go before switching on the code? */ - if (se->sv_mask) - sa->code &= se->sv_mask; if (sa->code >= se->sv_size) sa->callp = &se->sv_table[0]; else sa->callp = &se->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (sa->narg > nsaved) { #if defined(__mips_n32) || defined(__mips_n64) /* * XXX * Is this right for new ABIs? I think the 4 there * should be 8, size there are 8 registers to skip, * not 4, but I'm not certain. */ #ifdef COMPAT_FREEBSD32 if (!SV_PROC_FLAG(td->td_proc, SV_ILP32)) #endif printf("SYSCALL #%u pid:%u, narg (%u) > nsaved (%u).\n", sa->code, td->td_proc->p_pid, sa->narg, nsaved); #endif #if (defined(__mips_n32) || defined(__mips_n64)) && defined(COMPAT_FREEBSD32) if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { unsigned i; int32_t arg; error = 0; /* XXX GCC is awful. */ for (i = nsaved; i < sa->narg; i++) { error = copyin((caddr_t)(intptr_t)(locr0->sp + (4 + (i - nsaved)) * sizeof(int32_t)), (caddr_t)&arg, sizeof arg); if (error != 0) break; sa->args[i] = arg; } } else #endif error = copyin((caddr_t)(intptr_t)(locr0->sp + 4 * sizeof(register_t)), (caddr_t)&sa->args[nsaved], (u_int)(sa->narg - nsaved) * sizeof(register_t)); if (error != 0) { locr0->v0 = error; locr0->a3 = 1; } } else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = locr0->v1; } return (error); } #undef __FBSDID #define __FBSDID(x) #include "../../kern/subr_syscall.c" /* * Handle an exception. * Called from MipsKernGenException() or MipsUserGenException() * when a processor trap occurs. * In the case of a kernel trap, we return the pc where to resume if * p->p_addr->u_pcb.pcb_onfault is set, otherwise, return old pc. */ register_t trap(struct trapframe *trapframe) { int type, usermode; int i = 0; unsigned ucode = 0; struct thread *td = curthread; struct proc *p = curproc; vm_prot_t ftype; pmap_t pmap; int access_type; ksiginfo_t ksi; char *msg = NULL; intptr_t addr = 0; register_t pc; int cop, error; register_t *frame_regs; trapdebug_enter(trapframe, 0); #ifdef KDB if (kdb_active) { kdb_reenter(); return (0); } #endif type = (trapframe->cause & MIPS_CR_EXC_CODE) >> MIPS_CR_EXC_CODE_SHIFT; if (TRAPF_USERMODE(trapframe)) { type |= T_USER; usermode = 1; } else { usermode = 0; } /* * Enable hardware interrupts if they were on before the trap. If it * was off disable all so we don't accidently enable it when doing a * return to userland. */ if (trapframe->sr & MIPS_SR_INT_IE) { set_intr_mask(trapframe->sr & MIPS_SR_INT_MASK); intr_enable(); } else { intr_disable(); } #ifdef TRAP_DEBUG if (trap_debug) { static vm_offset_t last_badvaddr = 0; static vm_offset_t this_badvaddr = 0; static int count = 0; u_int32_t pid; printf("trap type %x (%s - ", type, trap_type[type & (~T_USER)]); if (type & T_USER) printf("user mode)\n"); else printf("kernel mode)\n"); #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif pid = mips_rd_entryhi() & TLBHI_ASID_MASK; printf("badaddr = %#jx, pc = %#jx, ra = %#jx, sp = %#jx, sr = %jx, pid = %d, ASID = %u\n", (intmax_t)trapframe->badvaddr, (intmax_t)trapframe->pc, (intmax_t)trapframe->ra, (intmax_t)trapframe->sp, (intmax_t)trapframe->sr, (curproc ? curproc->p_pid : -1), pid); switch (type & ~T_USER) { case T_TLB_MOD: case T_TLB_LD_MISS: case T_TLB_ST_MISS: case T_ADDR_ERR_LD: case T_ADDR_ERR_ST: this_badvaddr = trapframe->badvaddr; break; case T_SYSCALL: this_badvaddr = trapframe->ra; break; default: this_badvaddr = trapframe->pc; break; } if ((last_badvaddr == this_badvaddr) && ((type & ~T_USER) != T_SYSCALL) && ((type & ~T_USER) != T_COP_UNUSABLE)) { if (++count == 3) { trap_frame_dump(trapframe); panic("too many faults at %p\n", (void *)last_badvaddr); } } else { last_badvaddr = this_badvaddr; count = 0; } } #endif #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ /* * XXXDTRACE: add pid probe handler here (if ever) */ if (!usermode) { if (dtrace_trap_func != NULL && (*dtrace_trap_func)(trapframe, type) != 0) return (trapframe->pc); } #endif switch (type) { case T_MCHECK: #ifdef DDB kdb_trap(type, 0, trapframe); #endif panic("MCHECK\n"); break; case T_TLB_MOD: /* check for kernel address */ if (KERNLAND(trapframe->badvaddr)) { if (pmap_emulate_modified(kernel_pmap, trapframe->badvaddr) != 0) { ftype = VM_PROT_WRITE; goto kernel_fault; } return (trapframe->pc); } /* FALLTHROUGH */ case T_TLB_MOD + T_USER: pmap = &p->p_vmspace->vm_pmap; if (pmap_emulate_modified(pmap, trapframe->badvaddr) != 0) { ftype = VM_PROT_WRITE; goto dofault; } if (!usermode) return (trapframe->pc); goto out; case T_TLB_LD_MISS: case T_TLB_ST_MISS: ftype = (type == T_TLB_ST_MISS) ? VM_PROT_WRITE : VM_PROT_READ; /* check for kernel address */ if (KERNLAND(trapframe->badvaddr)) { vm_offset_t va; int rv; kernel_fault: va = trunc_page((vm_offset_t)trapframe->badvaddr); rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) return (trapframe->pc); if (td->td_pcb->pcb_onfault != NULL) { pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; td->td_pcb->pcb_onfault = NULL; return (pc); } goto err; } /* * It is an error for the kernel to access user space except * through the copyin/copyout routines. */ if (td->td_pcb->pcb_onfault == NULL) goto err; goto dofault; case T_TLB_LD_MISS + T_USER: ftype = VM_PROT_READ; goto dofault; case T_TLB_ST_MISS + T_USER: ftype = VM_PROT_WRITE; dofault: { vm_offset_t va; struct vmspace *vm; vm_map_t map; int rv = 0; vm = p->p_vmspace; map = &vm->vm_map; va = trunc_page((vm_offset_t)trapframe->badvaddr); if (KERNLAND(trapframe->badvaddr)) { /* * Don't allow user-mode faults in kernel * address space. */ goto nogo; } rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ #ifdef VMFAULT_TRACE printf("vm_fault(%p (pmap %p), %p (%p), %x, %d) -> %x at pc %p\n", map, &vm->vm_pmap, (void *)va, (void *)(intptr_t)trapframe->badvaddr, ftype, VM_FAULT_NORMAL, rv, (void *)(intptr_t)trapframe->pc); #endif if (rv == KERN_SUCCESS) { if (!usermode) { return (trapframe->pc); } goto out; } nogo: if (!usermode) { if (td->td_pcb->pcb_onfault != NULL) { pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; td->td_pcb->pcb_onfault = NULL; return (pc); } goto err; } i = SIGSEGV; if (rv == KERN_PROTECTION_FAILURE) ucode = SEGV_ACCERR; else ucode = SEGV_MAPERR; addr = trapframe->pc; msg = "BAD_PAGE_FAULT"; log_bad_page_fault(msg, trapframe, type); break; } case T_ADDR_ERR_LD + T_USER: /* misaligned or kseg access */ case T_ADDR_ERR_ST + T_USER: /* misaligned or kseg access */ if (trapframe->badvaddr < 0 || trapframe->badvaddr >= VM_MAXUSER_ADDRESS) { msg = "ADDRESS_SPACE_ERR"; } else if (allow_unaligned_acc) { int mode; if (type == (T_ADDR_ERR_LD + T_USER)) mode = VM_PROT_READ; else mode = VM_PROT_WRITE; access_type = emulate_unaligned_access(trapframe, mode); if (access_type != 0) goto out; msg = "ALIGNMENT_FIX_ERR"; } else { msg = "ADDRESS_ERR"; } /* FALL THROUGH */ case T_BUS_ERR_IFETCH + T_USER: /* BERR asserted to cpu */ case T_BUS_ERR_LD_ST + T_USER: /* BERR asserted to cpu */ ucode = 0; /* XXX should be VM_PROT_something */ i = SIGBUS; addr = trapframe->pc; if (!msg) msg = "BUS_ERR"; log_bad_page_fault(msg, trapframe, type); break; case T_SYSCALL + T_USER: { int error; td->td_sa.trapframe = trapframe; error = syscallenter(td); #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) if (trp == trapdebug) trapdebug[TRAPSIZE - 1].code = td->td_sa.code; else trp[-1].code = td->td_sa.code; #endif trapdebug_enter(td->td_frame, -td->td_sa.code); /* * The sync'ing of I & D caches for SYS_ptrace() is * done by procfs_domem() through procfs_rwmem() * instead of being done here under a special check * for SYS_ptrace(). */ syscallret(td, error); return (trapframe->pc); } #if defined(KDTRACE_HOOKS) || defined(DDB) case T_BREAK: #ifdef KDTRACE_HOOKS if (!usermode && dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(trapframe); return (trapframe->pc); } #endif #ifdef DDB kdb_trap(type, 0, trapframe); return (trapframe->pc); #endif #endif case T_BREAK + T_USER: { intptr_t va; uint32_t instr; i = SIGTRAP; ucode = TRAP_BRKPT; addr = trapframe->pc; /* compute address of break instruction */ va = trapframe->pc; if (DELAYBRANCH(trapframe->cause)) va += sizeof(int); if (td->td_md.md_ss_addr != va) break; /* read break instruction */ instr = fuword32((caddr_t)va); if (instr != MIPS_BREAK_SSTEP) break; CTR3(KTR_PTRACE, "trap: tid %d, single step at %#lx: %#08x", td->td_tid, va, instr); PROC_LOCK(p); _PHOLD(p); error = ptrace_clear_single_step(td); _PRELE(p); PROC_UNLOCK(p); if (error == 0) ucode = TRAP_TRACE; break; } case T_IWATCH + T_USER: case T_DWATCH + T_USER: { intptr_t va; /* compute address of trapped instruction */ va = trapframe->pc; if (DELAYBRANCH(trapframe->cause)) va += sizeof(int); printf("watch exception @ %p\n", (void *)va); i = SIGTRAP; ucode = TRAP_BRKPT; addr = va; break; } case T_TRAP + T_USER: { intptr_t va; struct trapframe *locr0 = td->td_frame; /* compute address of trap instruction */ va = trapframe->pc; if (DELAYBRANCH(trapframe->cause)) va += sizeof(int); if (DELAYBRANCH(trapframe->cause)) { /* Check BD bit */ locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); } else { locr0->pc += sizeof(int); } addr = va; i = SIGEMT; /* Stuff it with something for now */ break; } case T_RES_INST + T_USER: { InstFmt inst; inst = *(InstFmt *)(intptr_t)trapframe->pc; switch (inst.RType.op) { case OP_SPECIAL3: switch (inst.RType.func) { case OP_RDHWR: /* Register 29 used for TLS */ if (inst.RType.rd == 29) { frame_regs = &(trapframe->zero); frame_regs[inst.RType.rt] = (register_t)(intptr_t)td->td_md.md_tls; frame_regs[inst.RType.rt] += td->td_md.md_tls_tcb_offset; trapframe->pc += sizeof(int); goto out; } break; } break; } log_illegal_instruction("RES_INST", trapframe); i = SIGILL; addr = trapframe->pc; } break; case T_C2E: case T_C2E + T_USER: goto err; break; case T_COP_UNUSABLE: #ifdef CPU_CNMIPS cop = (trapframe->cause & MIPS_CR_COP_ERR) >> MIPS_CR_COP_ERR_SHIFT; /* Handle only COP2 exception */ if (cop != 2) goto err; addr = trapframe->pc; /* save userland cop2 context if it has been touched */ if ((td->td_md.md_flags & MDTD_COP2USED) && (td->td_md.md_cop2owner == COP2_OWNER_USERLAND)) { if (td->td_md.md_ucop2) octeon_cop2_save(td->td_md.md_ucop2); else panic("COP2 was used in user mode but md_ucop2 is NULL"); } if (td->td_md.md_cop2 == NULL) { td->td_md.md_cop2 = octeon_cop2_alloc_ctx(); if (td->td_md.md_cop2 == NULL) panic("Failed to allocate COP2 context"); memset(td->td_md.md_cop2, 0, sizeof(*td->td_md.md_cop2)); } octeon_cop2_restore(td->td_md.md_cop2); /* Make userland re-request its context */ td->td_frame->sr &= ~MIPS_SR_COP_2_BIT; td->td_md.md_flags |= MDTD_COP2USED; td->td_md.md_cop2owner = COP2_OWNER_KERNEL; /* Enable COP2, it will be disabled in cpu_switch */ mips_wr_status(mips_rd_status() | MIPS_SR_COP_2_BIT); return (trapframe->pc); #else goto err; break; #endif case T_COP_UNUSABLE + T_USER: cop = (trapframe->cause & MIPS_CR_COP_ERR) >> MIPS_CR_COP_ERR_SHIFT; if (cop == 1) { /* FP (COP1) instruction */ if (cpuinfo.fpu_id == 0) { log_illegal_instruction("COP1_UNUSABLE", trapframe); i = SIGILL; break; } addr = trapframe->pc; MipsSwitchFPState(PCPU_GET(fpcurthread), td->td_frame); PCPU_SET(fpcurthread, td); #if defined(__mips_n32) || defined(__mips_n64) td->td_frame->sr |= MIPS_SR_COP_1_BIT | MIPS_SR_FR; #else td->td_frame->sr |= MIPS_SR_COP_1_BIT; #endif td->td_md.md_flags |= MDTD_FPUSED; goto out; } #ifdef CPU_CNMIPS else if (cop == 2) { addr = trapframe->pc; if ((td->td_md.md_flags & MDTD_COP2USED) && (td->td_md.md_cop2owner == COP2_OWNER_KERNEL)) { if (td->td_md.md_cop2) octeon_cop2_save(td->td_md.md_cop2); else panic("COP2 was used in kernel mode but md_cop2 is NULL"); } if (td->td_md.md_ucop2 == NULL) { td->td_md.md_ucop2 = octeon_cop2_alloc_ctx(); if (td->td_md.md_ucop2 == NULL) panic("Failed to allocate userland COP2 context"); memset(td->td_md.md_ucop2, 0, sizeof(*td->td_md.md_ucop2)); } octeon_cop2_restore(td->td_md.md_ucop2); td->td_frame->sr |= MIPS_SR_COP_2_BIT; td->td_md.md_flags |= MDTD_COP2USED; td->td_md.md_cop2owner = COP2_OWNER_USERLAND; goto out; } #endif else { log_illegal_instruction("COPn_UNUSABLE", trapframe); i = SIGILL; /* only FPU instructions allowed */ break; } case T_FPE: #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) trapDump("fpintr"); #else printf("FPU Trap: PC %#jx CR %x SR %x\n", (intmax_t)trapframe->pc, (unsigned)trapframe->cause, (unsigned)trapframe->sr); goto err; #endif case T_FPE + T_USER: if (!emulate_fp) { i = SIGFPE; addr = trapframe->pc; break; } MipsFPTrap(trapframe->sr, trapframe->cause, trapframe->pc); goto out; case T_OVFLOW + T_USER: i = SIGFPE; addr = trapframe->pc; break; case T_ADDR_ERR_LD: /* misaligned access */ case T_ADDR_ERR_ST: /* misaligned access */ #ifdef TRAP_DEBUG if (trap_debug) { printf("+++ ADDR_ERR: type = %d, badvaddr = %#jx\n", type, (intmax_t)trapframe->badvaddr); } #endif /* Only allow emulation on a user address */ if (allow_unaligned_acc && ((vm_offset_t)trapframe->badvaddr < VM_MAXUSER_ADDRESS)) { int mode; if (type == T_ADDR_ERR_LD) mode = VM_PROT_READ; else mode = VM_PROT_WRITE; access_type = emulate_unaligned_access(trapframe, mode); if (access_type != 0) return (trapframe->pc); } /* FALLTHROUGH */ case T_BUS_ERR_LD_ST: /* BERR asserted to cpu */ if (td->td_pcb->pcb_onfault != NULL) { pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; td->td_pcb->pcb_onfault = NULL; return (pc); } /* FALLTHROUGH */ default: err: #if !defined(SMP) && defined(DEBUG) trapDump("trap"); #endif #ifdef SMP printf("cpu:%d-", PCPU_GET(cpuid)); #endif printf("Trap cause = %d (%s - ", type, trap_type[type & (~T_USER)]); if (type & T_USER) printf("user mode)\n"); else printf("kernel mode)\n"); #ifdef TRAP_DEBUG if (trap_debug) printf("badvaddr = %#jx, pc = %#jx, ra = %#jx, sr = %#jxx\n", (intmax_t)trapframe->badvaddr, (intmax_t)trapframe->pc, (intmax_t)trapframe->ra, (intmax_t)trapframe->sr); #endif #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; kdb_trap(type, 0, trapframe); kdb_why = KDB_WHY_UNSET; } #endif panic("trap"); } td->td_frame->pc = trapframe->pc; td->td_frame->cause = trapframe->cause; td->td_frame->badvaddr = trapframe->badvaddr; ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; trapsignal(td, &ksi); out: /* * Note: we should only get here if returning to user mode. */ userret(td, trapframe); return (trapframe->pc); } #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) void trapDump(char *msg) { register_t s; int i; s = intr_disable(); printf("trapDump(%s)\n", msg); for (i = 0; i < TRAPSIZE; i++) { if (trp == trapdebug) { trp = &trapdebug[TRAPSIZE - 1]; } else { trp--; } if (trp->cause == 0) break; printf("%s: ADR %jx PC %jx CR %jx SR %jx\n", trap_type[(trp->cause & MIPS_CR_EXC_CODE) >> MIPS_CR_EXC_CODE_SHIFT], (intmax_t)trp->vadr, (intmax_t)trp->pc, (intmax_t)trp->cause, (intmax_t)trp->status); printf(" RA %jx SP %jx code %d\n", (intmax_t)trp->ra, (intmax_t)trp->sp, (int)trp->code); } intr_restore(s); } #endif /* * Return the resulting PC as if the branch was executed. */ uintptr_t MipsEmulateBranch(struct trapframe *framePtr, uintptr_t instPC, int fpcCSR, uintptr_t instptr) { InstFmt inst; register_t *regsPtr = (register_t *) framePtr; uintptr_t retAddr = 0; int condition; #define GetBranchDest(InstPtr, inst) \ (InstPtr + 4 + ((short)inst.IType.imm << 2)) if (instptr) { if (instptr < MIPS_KSEG0_START) inst.word = fuword32((void *)instptr); else inst = *(InstFmt *) instptr; } else { if ((vm_offset_t)instPC < MIPS_KSEG0_START) inst.word = fuword32((void *)instPC); else inst = *(InstFmt *) instPC; } switch ((int)inst.JType.op) { case OP_SPECIAL: switch ((int)inst.RType.func) { case OP_JR: case OP_JALR: retAddr = regsPtr[inst.RType.rs]; break; default: retAddr = instPC + 4; break; } break; case OP_BCOND: switch ((int)inst.IType.rt) { case OP_BLTZ: case OP_BLTZL: case OP_BLTZAL: case OP_BLTZALL: if ((int)(regsPtr[inst.RType.rs]) < 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BGEZ: case OP_BGEZL: case OP_BGEZAL: case OP_BGEZALL: if ((int)(regsPtr[inst.RType.rs]) >= 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_TGEI: case OP_TGEIU: case OP_TLTI: case OP_TLTIU: case OP_TEQI: case OP_TNEI: retAddr = instPC + 4; /* Like syscall... */ break; default: panic("MipsEmulateBranch: Bad branch cond"); } break; case OP_J: case OP_JAL: retAddr = (inst.JType.target << 2) | ((unsigned)(instPC + 4) & 0xF0000000); break; case OP_BEQ: case OP_BEQL: if (regsPtr[inst.RType.rs] == regsPtr[inst.RType.rt]) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BNE: case OP_BNEL: if (regsPtr[inst.RType.rs] != regsPtr[inst.RType.rt]) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BLEZ: case OP_BLEZL: if ((int)(regsPtr[inst.RType.rs]) <= 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_BGTZ: case OP_BGTZL: if ((int)(regsPtr[inst.RType.rs]) > 0) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; case OP_COP1: switch (inst.RType.rs) { case OP_BCx: case OP_BCy: if ((inst.RType.rt & COPz_BC_TF_MASK) == COPz_BC_TRUE) condition = fpcCSR & MIPS_FPU_COND_BIT; else condition = !(fpcCSR & MIPS_FPU_COND_BIT); if (condition) retAddr = GetBranchDest(instPC, inst); else retAddr = instPC + 8; break; default: retAddr = instPC + 4; } break; default: retAddr = instPC + 4; } return (retAddr); } static void log_frame_dump(struct trapframe *frame) { log(LOG_ERR, "Trapframe Register Dump:\n"); log(LOG_ERR, "\tzero: %#jx\tat: %#jx\tv0: %#jx\tv1: %#jx\n", (intmax_t)0, (intmax_t)frame->ast, (intmax_t)frame->v0, (intmax_t)frame->v1); log(LOG_ERR, "\ta0: %#jx\ta1: %#jx\ta2: %#jx\ta3: %#jx\n", (intmax_t)frame->a0, (intmax_t)frame->a1, (intmax_t)frame->a2, (intmax_t)frame->a3); #if defined(__mips_n32) || defined(__mips_n64) log(LOG_ERR, "\ta4: %#jx\ta5: %#jx\ta6: %#jx\ta6: %#jx\n", (intmax_t)frame->a4, (intmax_t)frame->a5, (intmax_t)frame->a6, (intmax_t)frame->a7); log(LOG_ERR, "\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); #else log(LOG_ERR, "\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); log(LOG_ERR, "\tt4: %#jx\tt5: %#jx\tt6: %#jx\tt7: %#jx\n", (intmax_t)frame->t4, (intmax_t)frame->t5, (intmax_t)frame->t6, (intmax_t)frame->t7); #endif log(LOG_ERR, "\tt8: %#jx\tt9: %#jx\ts0: %#jx\ts1: %#jx\n", (intmax_t)frame->t8, (intmax_t)frame->t9, (intmax_t)frame->s0, (intmax_t)frame->s1); log(LOG_ERR, "\ts2: %#jx\ts3: %#jx\ts4: %#jx\ts5: %#jx\n", (intmax_t)frame->s2, (intmax_t)frame->s3, (intmax_t)frame->s4, (intmax_t)frame->s5); log(LOG_ERR, "\ts6: %#jx\ts7: %#jx\tk0: %#jx\tk1: %#jx\n", (intmax_t)frame->s6, (intmax_t)frame->s7, (intmax_t)frame->k0, (intmax_t)frame->k1); log(LOG_ERR, "\tgp: %#jx\tsp: %#jx\ts8: %#jx\tra: %#jx\n", (intmax_t)frame->gp, (intmax_t)frame->sp, (intmax_t)frame->s8, (intmax_t)frame->ra); log(LOG_ERR, "\tsr: %#jx\tmullo: %#jx\tmulhi: %#jx\tbadvaddr: %#jx\n", (intmax_t)frame->sr, (intmax_t)frame->mullo, (intmax_t)frame->mulhi, (intmax_t)frame->badvaddr); log(LOG_ERR, "\tcause: %#jx\tpc: %#jx\n", (intmax_t)frame->cause, (intmax_t)frame->pc); } #ifdef TRAP_DEBUG static void trap_frame_dump(struct trapframe *frame) { printf("Trapframe Register Dump:\n"); printf("\tzero: %#jx\tat: %#jx\tv0: %#jx\tv1: %#jx\n", (intmax_t)0, (intmax_t)frame->ast, (intmax_t)frame->v0, (intmax_t)frame->v1); printf("\ta0: %#jx\ta1: %#jx\ta2: %#jx\ta3: %#jx\n", (intmax_t)frame->a0, (intmax_t)frame->a1, (intmax_t)frame->a2, (intmax_t)frame->a3); #if defined(__mips_n32) || defined(__mips_n64) printf("\ta4: %#jx\ta5: %#jx\ta6: %#jx\ta7: %#jx\n", (intmax_t)frame->a4, (intmax_t)frame->a5, (intmax_t)frame->a6, (intmax_t)frame->a7); printf("\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); #else printf("\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); printf("\tt4: %#jx\tt5: %#jx\tt6: %#jx\tt7: %#jx\n", (intmax_t)frame->t4, (intmax_t)frame->t5, (intmax_t)frame->t6, (intmax_t)frame->t7); #endif printf("\tt8: %#jx\tt9: %#jx\ts0: %#jx\ts1: %#jx\n", (intmax_t)frame->t8, (intmax_t)frame->t9, (intmax_t)frame->s0, (intmax_t)frame->s1); printf("\ts2: %#jx\ts3: %#jx\ts4: %#jx\ts5: %#jx\n", (intmax_t)frame->s2, (intmax_t)frame->s3, (intmax_t)frame->s4, (intmax_t)frame->s5); printf("\ts6: %#jx\ts7: %#jx\tk0: %#jx\tk1: %#jx\n", (intmax_t)frame->s6, (intmax_t)frame->s7, (intmax_t)frame->k0, (intmax_t)frame->k1); printf("\tgp: %#jx\tsp: %#jx\ts8: %#jx\tra: %#jx\n", (intmax_t)frame->gp, (intmax_t)frame->sp, (intmax_t)frame->s8, (intmax_t)frame->ra); printf("\tsr: %#jx\tmullo: %#jx\tmulhi: %#jx\tbadvaddr: %#jx\n", (intmax_t)frame->sr, (intmax_t)frame->mullo, (intmax_t)frame->mulhi, (intmax_t)frame->badvaddr); printf("\tcause: %#jx\tpc: %#jx\n", (intmax_t)frame->cause, (intmax_t)frame->pc); } #endif static void get_mapping_info(vm_offset_t va, pd_entry_t **pdepp, pt_entry_t **ptepp) { pt_entry_t *ptep; pd_entry_t *pdep; struct proc *p = curproc; pdep = (&(p->p_vmspace->vm_pmap.pm_segtab[(va >> SEGSHIFT) & (NPDEPG - 1)])); if (*pdep) ptep = pmap_pte(&p->p_vmspace->vm_pmap, va); else ptep = (pt_entry_t *)0; *pdepp = pdep; *ptepp = ptep; } static void log_illegal_instruction(const char *msg, struct trapframe *frame) { pt_entry_t *ptep; pd_entry_t *pdep; unsigned int *addr; struct thread *td; struct proc *p; register_t pc; td = curthread; p = td->td_proc; #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); log(LOG_ERR, "%s: pid %d tid %ld (%s), uid %d: pc %#jx ra %#jx\n", msg, p->p_pid, (long)td->td_tid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, (intmax_t)pc, (intmax_t)frame->ra); /* log registers in trap frame */ log_frame_dump(frame); get_mapping_info((vm_offset_t)pc, &pdep, &ptep); /* * Dump a few words around faulting instruction, if the addres is * valid. */ if (!(pc & 3) && useracc((caddr_t)(intptr_t)pc, sizeof(int) * 4, VM_PROT_READ)) { /* dump page table entry for faulting instruction */ log(LOG_ERR, "Page table info for pc address %#jx: pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); addr = (unsigned int *)(intptr_t)pc; log(LOG_ERR, "Dumping 4 words starting at pc address %p: \n", addr); log(LOG_ERR, "%08x %08x %08x %08x\n", addr[0], addr[1], addr[2], addr[3]); } else { log(LOG_ERR, "pc address %#jx is inaccessible, pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); } } static void log_bad_page_fault(char *msg, struct trapframe *frame, int trap_type) { pt_entry_t *ptep; pd_entry_t *pdep; unsigned int *addr; struct thread *td; struct proc *p; char *read_or_write; register_t pc; trap_type &= ~T_USER; td = curthread; p = td->td_proc; #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif switch (trap_type) { case T_TLB_MOD: case T_TLB_ST_MISS: case T_ADDR_ERR_ST: read_or_write = "write"; break; case T_TLB_LD_MISS: case T_ADDR_ERR_LD: case T_BUS_ERR_IFETCH: read_or_write = "read"; break; default: read_or_write = "unknown"; } pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); log(LOG_ERR, "%s: pid %d tid %ld (%s), uid %d: pc %#jx got a %s fault " "(type %#x) at %#jx\n", msg, p->p_pid, (long)td->td_tid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, (intmax_t)pc, read_or_write, trap_type, (intmax_t)frame->badvaddr); /* log registers in trap frame */ log_frame_dump(frame); get_mapping_info((vm_offset_t)pc, &pdep, &ptep); /* * Dump a few words around faulting instruction, if the addres is * valid. */ if (!(pc & 3) && (pc != frame->badvaddr) && (trap_type != T_BUS_ERR_IFETCH) && useracc((caddr_t)(intptr_t)pc, sizeof(int) * 4, VM_PROT_READ)) { /* dump page table entry for faulting instruction */ log(LOG_ERR, "Page table info for pc address %#jx: pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); addr = (unsigned int *)(intptr_t)pc; log(LOG_ERR, "Dumping 4 words starting at pc address %p: \n", addr); log(LOG_ERR, "%08x %08x %08x %08x\n", addr[0], addr[1], addr[2], addr[3]); } else { log(LOG_ERR, "pc address %#jx is inaccessible, pde = %p, pte = %#jx\n", (intmax_t)pc, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); } get_mapping_info((vm_offset_t)frame->badvaddr, &pdep, &ptep); log(LOG_ERR, "Page table info for bad address %#jx: pde = %p, pte = %#jx\n", (intmax_t)frame->badvaddr, (void *)(intptr_t)*pdep, (uintmax_t)(ptep ? *ptep : 0)); } /* * Unaligned load/store emulation */ static int mips_unaligned_load_store(struct trapframe *frame, int mode, register_t addr, register_t pc) { register_t *reg = (register_t *) frame; u_int32_t inst = *((u_int32_t *)(intptr_t)pc); register_t value_msb, value; unsigned size; /* * ADDR_ERR faults have higher priority than TLB * Miss faults. Therefore, it is necessary to * verify that the faulting address is a valid * virtual address within the process' address space * before trying to emulate the unaligned access. */ switch (MIPS_INST_OPCODE(inst)) { case OP_LHU: case OP_LH: case OP_SH: size = 2; break; case OP_LWU: case OP_LW: case OP_SW: size = 4; break; case OP_LD: case OP_SD: size = 8; break; default: printf("%s: unhandled opcode in address error: %#x\n", __func__, MIPS_INST_OPCODE(inst)); return (0); } if (!useracc((void *)rounddown2((vm_offset_t)addr, size), size * 2, mode)) return (0); /* * XXX * Handle LL/SC LLD/SCD. */ switch (MIPS_INST_OPCODE(inst)) { case OP_LHU: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lbu_macro(value_msb, addr); addr += 1; lbu_macro(value, addr); value |= value_msb << 8; reg[MIPS_INST_RT(inst)] = value; return (MIPS_LHU_ACCESS); case OP_LH: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lb_macro(value_msb, addr); addr += 1; lbu_macro(value, addr); value |= value_msb << 8; reg[MIPS_INST_RT(inst)] = value; return (MIPS_LH_ACCESS); case OP_LWU: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lwl_macro(value, addr); addr += 3; lwr_macro(value, addr); value &= 0xffffffff; reg[MIPS_INST_RT(inst)] = value; return (MIPS_LWU_ACCESS); case OP_LW: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); lwl_macro(value, addr); addr += 3; lwr_macro(value, addr); reg[MIPS_INST_RT(inst)] = value; return (MIPS_LW_ACCESS); #if defined(__mips_n32) || defined(__mips_n64) case OP_LD: KASSERT(mode == VM_PROT_READ, ("access mode must be read for load instruction.")); ldl_macro(value, addr); addr += 7; ldr_macro(value, addr); reg[MIPS_INST_RT(inst)] = value; return (MIPS_LD_ACCESS); #endif case OP_SH: KASSERT(mode == VM_PROT_WRITE, ("access mode must be write for store instruction.")); value = reg[MIPS_INST_RT(inst)]; value_msb = value >> 8; sb_macro(value_msb, addr); addr += 1; sb_macro(value, addr); return (MIPS_SH_ACCESS); case OP_SW: KASSERT(mode == VM_PROT_WRITE, ("access mode must be write for store instruction.")); value = reg[MIPS_INST_RT(inst)]; swl_macro(value, addr); addr += 3; swr_macro(value, addr); return (MIPS_SW_ACCESS); #if defined(__mips_n32) || defined(__mips_n64) case OP_SD: KASSERT(mode == VM_PROT_WRITE, ("access mode must be write for store instruction.")); value = reg[MIPS_INST_RT(inst)]; sdl_macro(value, addr); addr += 7; sdr_macro(value, addr); return (MIPS_SD_ACCESS); #endif } panic("%s: should not be reached.", __func__); } /* * XXX TODO: SMP? */ static struct timeval unaligned_lasterr; static int unaligned_curerr; static int unaligned_pps_log_limit = 4; SYSCTL_INT(_machdep, OID_AUTO, unaligned_log_pps_limit, CTLFLAG_RWTUN, &unaligned_pps_log_limit, 0, "limit number of userland unaligned log messages per second"); static int emulate_unaligned_access(struct trapframe *frame, int mode) { register_t pc; int access_type = 0; struct thread *td = curthread; struct proc *p = curproc; pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); /* * Fall through if it's instruction fetch exception */ if (!((pc & 3) || (pc == frame->badvaddr))) { /* * Handle unaligned load and store */ /* * Return access type if the instruction was emulated. * Otherwise restore pc and fall through. */ access_type = mips_unaligned_load_store(frame, mode, frame->badvaddr, pc); if (access_type) { if (DELAYBRANCH(frame->cause)) frame->pc = MipsEmulateBranch(frame, frame->pc, 0, 0); else frame->pc += 4; if (ppsratecheck(&unaligned_lasterr, &unaligned_curerr, unaligned_pps_log_limit)) { /* XXX TODO: keep global/tid/pid counters? */ log(LOG_INFO, "Unaligned %s: pid=%ld (%s), tid=%ld, " "pc=%#jx, badvaddr=%#jx\n", access_name[access_type - 1], (long) p->p_pid, p->p_comm, (long) td->td_tid, (intmax_t)pc, (intmax_t)frame->badvaddr); } } } return access_type; } Index: head/sys/powerpc/powerpc/elf32_machdep.c =================================================================== --- head/sys/powerpc/powerpc/elf32_machdep.c (revision 342242) +++ head/sys/powerpc/powerpc/elf32_machdep.c (revision 342243) @@ -1,397 +1,396 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __powerpc64__ #include #include extern const char *freebsd32_syscallnames[]; static void ppc32_fixlimit(struct rlimit *rl, int which); static SYSCTL_NODE(_compat, OID_AUTO, ppc32, CTLFLAG_RW, 0, "32-bit mode"); #define PPC32_MAXDSIZ (1024*1024*1024) static u_long ppc32_maxdsiz = PPC32_MAXDSIZ; SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ppc32_maxdsiz, 0, ""); #define PPC32_MAXSSIZ (64*1024*1024) u_long ppc32_maxssiz = PPC32_MAXSSIZ; SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ppc32_maxssiz, 0, ""); #endif struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, #ifdef __powerpc64__ .sv_table = freebsd32_sysent, #else .sv_table = sysent, #endif - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_stackprot = VM_PROT_ALL, #ifdef __powerpc64__ .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ppc32_setregs, .sv_syscallnames = freebsd32_syscallnames, .sv_fixlimit = ppc32_fixlimit, #else .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_syscallnames = syscallnames, .sv_fixlimit = NULL, #endif .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, #ifdef __powerpc64__ .interp_newpath = "/libexec/ld-elf32.so.1", #else .interp_newpath = NULL, #endif .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; uint64_t vshr[32]; uint64_t *vsr_dw1; int vsr_idx; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf32_populate_note(NT_PPC_VMX, &pcb->pcb_vec, (char *)dst + len, sizeof(pcb->pcb_vec), NULL); } else len += elf32_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } if (pcb->pcb_flags & PCB_VSX) { save_fpu_nodrop(td); if (dst != NULL) { /* * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and * VSR32-VSR63 overlap with VR0-VR31, so we only copy * the non-overlapping data, which is doubleword 1 of VSR0-VSR31. */ for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) { vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2]; vshr[vsr_idx] = *vsr_dw1; } len += elf32_populate_note(NT_PPC_VSX, vshr, (char *)dst + len, sizeof(vshr), NULL); } else len += elf32_populate_note(NT_PPC_VSX, NULL, NULL, sizeof(vshr), NULL); } *off = len; } #ifndef __powerpc64__ bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Half *hwhere; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset); hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC_ADDR32: /* word32 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; *where = elf_relocaddr(lf, addr + addend); break; case R_PPC_ADDR16_LO: /* #lo(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = addr & 0xffff; break; case R_PPC_ADDR16_HA: /* #ha(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0)) & 0xffff; break; case R_PPC_RELATIVE: /* word32 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; default: printf("kldload: unexpected relocation type %d\n", (int) rtype); return -1; } return(0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = NULL, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } #endif #ifdef __powerpc64__ static void ppc32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ppc32_maxdsiz != 0) { if (rl->rlim_cur > ppc32_maxdsiz) rl->rlim_cur = ppc32_maxdsiz; if (rl->rlim_max > ppc32_maxdsiz) rl->rlim_max = ppc32_maxdsiz; } break; case RLIMIT_STACK: if (ppc32_maxssiz != 0) { if (rl->rlim_cur > ppc32_maxssiz) rl->rlim_cur = ppc32_maxssiz; if (rl->rlim_max > ppc32_maxssiz) rl->rlim_max = ppc32_maxssiz; } break; } } #endif Index: head/sys/powerpc/powerpc/elf64_machdep.c =================================================================== --- head/sys/powerpc/powerpc/elf64_machdep.c (revision 342242) +++ head/sys/powerpc/powerpc/elf64_machdep.c (revision 342243) @@ -1,411 +1,409 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, u_long stack); struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64, .sv_szsigcode = &szsigcode64, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs_funcdesc, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, }; INIT_SYSENTVEC(elf64_sysvec_v1, &elf64_freebsd_sysvec_v1); struct sysentvec elf64_freebsd_sysvec_v2 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64_elfv2, .sv_szsigcode = &szsigcode64_elfv2, .sv_name = "FreeBSD ELF64 V2", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, }; INIT_SYSENTVEC(elf64_sysvec_v2, &elf64_freebsd_sysvec_v2); static boolean_t ppc64_elfv1_header_match(struct image_params *params); static boolean_t ppc64_elfv2_header_match(struct image_params *params); static Elf64_Brandinfo freebsd_brand_info_elfv1 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv1); static Elf64_Brandinfo freebsd_brand_info_elfv2 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v2, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv2_header_match }; SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv2); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); static boolean_t ppc64_elfv1_header_match(struct image_params *params) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 0 || abi == 1); } static boolean_t ppc64_elfv2_header_match(struct image_params *params) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 2); } static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; register_t entry_desc[3]; tf = trapframe(td); exec_setregs(td, imgp, stack); /* * For 64-bit ELFv1, we need to disentangle the function * descriptor * * 0. entry point * 1. TOC value (r2) * 2. Environment pointer (r11) */ (void)copyin((void *)imgp->entry_addr, entry_desc, sizeof(entry_desc)); tf->srr0 = entry_desc[0] + imgp->reloc_base; tf->fixreg[2] = entry_desc[1] + imgp->reloc_base; tf->fixreg[11] = entry_desc[2] + imgp->reloc_base; } void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; uint64_t vshr[32]; uint64_t *vsr_dw1; int vsr_idx; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf64_populate_note(NT_PPC_VMX, &pcb->pcb_vec, (char *)dst + len, sizeof(pcb->pcb_vec), NULL); } else len += elf64_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } if (pcb->pcb_flags & PCB_VSX) { save_fpu_nodrop(td); if (dst != NULL) { /* * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and * VSR32-VSR63 overlap with VR0-VR31, so we only copy * the non-overlapping data, which is doubleword 1 of VSR0-VSR31. */ for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) { vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2]; vshr[vsr_idx] = *vsr_dw1; } len += elf64_populate_note(NT_PPC_VSX, vshr, (char *)dst + len, sizeof(vshr), NULL); } else len += elf64_populate_note(NT_PPC_VSX, NULL, NULL, sizeof(vshr), NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC64_ADDR64: /* doubleword64 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; *where = addr; break; case R_PPC_RELATIVE: /* doubleword64 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; case R_PPC_JMP_SLOT: /* function descriptor copy */ lookup(lf, symidx, 1, &addr); #if !defined(_CALL_ELF) || _CALL_ELF == 1 memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr)); #else *where = addr; #endif __asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory"); break; default: printf("kldload: unexpected relocation type %d\n", (int) rtype); return -1; } return(0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = NULL, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/powerpc/powerpc/exec_machdep.c =================================================================== --- head/sys/powerpc/powerpc/exec_machdep.c (revision 342242) +++ head/sys/powerpc/powerpc/exec_machdep.c (revision 342243) @@ -1,1128 +1,1126 @@ /*- * SPDX-License-Identifier: BSD-4-Clause AND BSD-2-Clause-FreeBSD * * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_fpu_emu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FPU_EMU #include #endif #ifdef COMPAT_FREEBSD32 #include #include #include typedef struct __ucontext32 { sigset_t uc_sigmask; mcontext32_t uc_mcontext; uint32_t uc_link; struct sigaltstack32 uc_stack; uint32_t uc_flags; uint32_t __spare__[4]; } ucontext32_t; struct sigframe32 { ucontext32_t sf_uc; struct siginfo32 sf_si; }; static int grab_mcontext32(struct thread *td, mcontext32_t *, int flags); #endif static int grab_mcontext(struct thread *, mcontext_t *, int); #ifdef __powerpc64__ extern struct sysentvec elf64_freebsd_sysvec_v2; #endif void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigacts *psp; struct sigframe sf; struct thread *td; struct proc *p; #ifdef COMPAT_FREEBSD32 struct siginfo32 siginfo32; struct sigframe32 sf32; #endif size_t sfpsize; caddr_t sfp, usfp; int oonstack, rndfsize; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; oonstack = sigonstack(tf->fixreg[1]); /* * Fill siginfo structure. */ ksi->ksi_info.si_signo = ksi->ksi_signo; ksi->ksi_info.si_addr = (void *)((tf->exc == EXC_DSI || tf->exc == EXC_DSE) ? tf->dar : tf->srr0); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32)) { siginfo_to_siginfo32(&ksi->ksi_info, &siginfo32); sig = siginfo32.si_signo; code = siginfo32.si_code; sfp = (caddr_t)&sf32; sfpsize = sizeof(sf32); rndfsize = roundup(sizeof(sf32), 16); /* * Save user context */ memset(&sf32, 0, sizeof(sf32)); grab_mcontext32(td, &sf32.sf_uc.uc_mcontext, 0); sf32.sf_uc.uc_sigmask = *mask; sf32.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp; sf32.sf_uc.uc_stack.ss_size = (uint32_t)td->td_sigstk.ss_size; sf32.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf32.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; } else { #endif sig = ksi->ksi_signo; code = ksi->ksi_code; sfp = (caddr_t)&sf; sfpsize = sizeof(sf); #ifdef __powerpc64__ /* * 64-bit PPC defines a 288 byte scratch region * below the stack. */ rndfsize = 288 + roundup(sizeof(sf), 48); #else rndfsize = roundup(sizeof(sf), 16); #endif /* * Save user context */ memset(&sf, 0, sizeof(sf)); grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; #ifdef COMPAT_FREEBSD32 } #endif CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* * Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { usfp = (void *)(((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - rndfsize) & ~0xFul); } else { usfp = (void *)((tf->fixreg[1] - rndfsize) & ~0xFul); } /* * Save the floating-point state, if necessary, then copy it. */ /* XXX */ /* * Set up the registers to return to sigcode. * * r1/sp - sigframe ptr * lr - sig function, dispatched to by blrl in trampoline * r3 - sig number * r4 - SIGINFO ? &siginfo : exception code * r5 - user context * srr0 - trampoline function addr */ tf->lr = (register_t)catcher; tf->fixreg[1] = (register_t)usfp; tf->fixreg[FIRSTARG] = sig; #ifdef COMPAT_FREEBSD32 tf->fixreg[FIRSTARG+2] = (register_t)usfp + ((SV_PROC_FLAG(p, SV_ILP32)) ? offsetof(struct sigframe32, sf_uc) : offsetof(struct sigframe, sf_uc)); #else tf->fixreg[FIRSTARG+2] = (register_t)usfp + offsetof(struct sigframe, sf_uc); #endif if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. */ #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32)) { sf32.sf_si = siginfo32; tf->fixreg[FIRSTARG+1] = (register_t)usfp + offsetof(struct sigframe32, sf_si); sf32.sf_si = siginfo32; } else { #endif tf->fixreg[FIRSTARG+1] = (register_t)usfp + offsetof(struct sigframe, sf_si); sf.sf_si = ksi->ksi_info; #ifdef COMPAT_FREEBSD32 } #endif } else { /* Old FreeBSD-style arguments. */ tf->fixreg[FIRSTARG+1] = code; tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? tf->dar : tf->srr0; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); tf->srr0 = (register_t)p->p_sysent->sv_sigcode_base; /* * copy the frame out to userland. */ if (copyout(sfp, usfp, sfpsize) != 0) { /* * Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); } CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->srr0, tf->fixreg[1]); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sys_sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_lr = tf->srr0; pcb->pcb_sp = tf->fixreg[1]; } /* * get_mcontext/sendsig helper routine that doesn't touch the * proc lock */ static int grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; int i; pcb = td->td_pcb; memset(mcp, 0, sizeof(mcontext_t)); mcp->mc_vers = _MC_VERSION; mcp->mc_flags = 0; memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe)); if (flags & GET_MC_CLEAR_RET) { mcp->mc_gpr[3] = 0; mcp->mc_gpr[4] = 0; } /* * This assumes that floating-point context is *not* lazy, * so if the thread has used FP there would have been a * FP-unavailable exception that would have set things up * correctly. */ if (pcb->pcb_flags & PCB_FPREGS) { if (pcb->pcb_flags & PCB_FPU) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_fpu(td); critical_exit(); } mcp->mc_flags |= _MC_FP_VALID; memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double)); for (i = 0; i < 32; i++) memcpy(&mcp->mc_fpreg[i], &pcb->pcb_fpu.fpr[i].fpr, sizeof(double)); } if (pcb->pcb_flags & PCB_VSX) { for (i = 0; i < 32; i++) memcpy(&mcp->mc_vsxfpreg[i], &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double)); } /* * Repeat for Altivec context */ if (pcb->pcb_flags & PCB_VEC) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_vec(td); critical_exit(); mcp->mc_flags |= _MC_AV_VALID; mcp->mc_vscr = pcb->pcb_vec.vscr; mcp->mc_vrsave = pcb->pcb_vec.vrsave; memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec)); } mcp->mc_len = sizeof(*mcp); return (0); } int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { int error; error = grab_mcontext(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tf; register_t tls; int i; pcb = td->td_pcb; tf = td->td_frame; if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp)) return (EINVAL); /* * Don't let the user set privileged MSR bits */ if ((mcp->mc_srr1 & psl_userstatic) != (tf->srr1 & psl_userstatic)) { return (EINVAL); } /* Copy trapframe, preserving TLS pointer across context change */ if (SV_PROC_FLAG(td->td_proc, SV_LP64)) tls = tf->fixreg[13]; else tls = tf->fixreg[2]; memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame)); if (SV_PROC_FLAG(td->td_proc, SV_LP64)) tf->fixreg[13] = tls; else tf->fixreg[2] = tls; if (mcp->mc_flags & _MC_FP_VALID) { /* enable_fpu() will happen lazily on a fault */ pcb->pcb_flags |= PCB_FPREGS; memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double)); bzero(pcb->pcb_fpu.fpr, sizeof(pcb->pcb_fpu.fpr)); for (i = 0; i < 32; i++) { memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i], sizeof(double)); memcpy(&pcb->pcb_fpu.fpr[i].vsr[2], &mcp->mc_vsxfpreg[i], sizeof(double)); } } if (mcp->mc_flags & _MC_AV_VALID) { if ((pcb->pcb_flags & PCB_VEC) != PCB_VEC) { critical_enter(); enable_vec(td); critical_exit(); } pcb->pcb_vec.vscr = mcp->mc_vscr; pcb->pcb_vec.vrsave = mcp->mc_vrsave; memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec)); } return (0); } /* * Set set up registers on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; register_t argc; tf = trapframe(td); bzero(tf, sizeof *tf); #ifdef __powerpc64__ tf->fixreg[1] = -roundup(-stack + 48, 16); #else tf->fixreg[1] = -roundup(-stack + 8, 16); #endif /* * Set up arguments for _start(): * _start(argc, argv, envp, obj, cleanup, ps_strings); * * Notes: * - obj and cleanup are the auxilliary and termination * vectors. They are fixed up by ld.elf_so. * - ps_strings is a NetBSD extention, and will be * ignored by executables which are strictly * compliant with the SVR4 ABI. */ /* Collect argc from the user stack */ argc = fuword((void *)stack); tf->fixreg[3] = argc; tf->fixreg[4] = stack + sizeof(register_t); tf->fixreg[5] = stack + (2 + argc)*sizeof(register_t); tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)imgp->ps_strings; /* NetBSD extension */ tf->srr0 = imgp->entry_addr; #ifdef __powerpc64__ tf->fixreg[12] = imgp->entry_addr; #endif tf->srr1 = psl_userset | PSL_FE_DFLT; td->td_pcb->pcb_flags = 0; } #ifdef COMPAT_FREEBSD32 void ppc32_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; uint32_t argc; tf = trapframe(td); bzero(tf, sizeof *tf); tf->fixreg[1] = -roundup(-stack + 8, 16); argc = fuword32((void *)stack); tf->fixreg[3] = argc; tf->fixreg[4] = stack + sizeof(uint32_t); tf->fixreg[5] = stack + (2 + argc)*sizeof(uint32_t); tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)imgp->ps_strings; /* NetBSD extension */ tf->srr0 = imgp->entry_addr; tf->srr1 = psl_userset32 | PSL_FE_DFLT; td->td_pcb->pcb_flags = 0; } #endif int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(regs, tf, sizeof(struct reg)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; int i; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPREGS) == 0) memset(fpregs, 0, sizeof(struct fpreg)); else { memcpy(&fpregs->fpscr, &pcb->pcb_fpu.fpscr, sizeof(double)); for (i = 0; i < 32; i++) memcpy(&fpregs->fpreg[i], &pcb->pcb_fpu.fpr[i].fpr, sizeof(double)); } return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(tf, regs, sizeof(struct reg)); return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; int i; pcb = td->td_pcb; pcb->pcb_flags |= PCB_FPREGS; memcpy(&pcb->pcb_fpu.fpscr, &fpregs->fpscr, sizeof(double)); for (i = 0; i < 32; i++) { memcpy(&pcb->pcb_fpu.fpr[i].fpr, &fpregs->fpreg[i], sizeof(double)); } return (0); } #ifdef COMPAT_FREEBSD32 int set_regs32(struct thread *td, struct reg32 *regs) { struct trapframe *tf; int i; tf = td->td_frame; for (i = 0; i < 32; i++) tf->fixreg[i] = regs->fixreg[i]; tf->lr = regs->lr; tf->cr = regs->cr; tf->xer = regs->xer; tf->ctr = regs->ctr; tf->srr0 = regs->pc; return (0); } int fill_regs32(struct thread *td, struct reg32 *regs) { struct trapframe *tf; int i; tf = td->td_frame; for (i = 0; i < 32; i++) regs->fixreg[i] = tf->fixreg[i]; regs->lr = tf->lr; regs->cr = tf->cr; regs->xer = tf->xer; regs->ctr = tf->ctr; regs->pc = tf->srr0; return (0); } static int grab_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { mcontext_t mcp64; int i, error; error = grab_mcontext(td, &mcp64, flags); if (error != 0) return (error); mcp->mc_vers = mcp64.mc_vers; mcp->mc_flags = mcp64.mc_flags; mcp->mc_onstack = mcp64.mc_onstack; mcp->mc_len = mcp64.mc_len; memcpy(mcp->mc_avec,mcp64.mc_avec,sizeof(mcp64.mc_avec)); memcpy(mcp->mc_av,mcp64.mc_av,sizeof(mcp64.mc_av)); for (i = 0; i < 42; i++) mcp->mc_frame[i] = mcp64.mc_frame[i]; memcpy(mcp->mc_fpreg,mcp64.mc_fpreg,sizeof(mcp64.mc_fpreg)); memcpy(mcp->mc_vsxfpreg,mcp64.mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg)); return (0); } static int get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { int error; error = grab_mcontext32(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { mcontext_t mcp64; int i, error; mcp64.mc_vers = mcp->mc_vers; mcp64.mc_flags = mcp->mc_flags; mcp64.mc_onstack = mcp->mc_onstack; mcp64.mc_len = mcp->mc_len; memcpy(mcp64.mc_avec,mcp->mc_avec,sizeof(mcp64.mc_avec)); memcpy(mcp64.mc_av,mcp->mc_av,sizeof(mcp64.mc_av)); for (i = 0; i < 42; i++) mcp64.mc_frame[i] = mcp->mc_frame[i]; mcp64.mc_srr1 |= (td->td_frame->srr1 & 0xFFFFFFFF00000000ULL); memcpy(mcp64.mc_fpreg,mcp->mc_fpreg,sizeof(mcp64.mc_fpreg)); memcpy(mcp64.mc_vsxfpreg,mcp->mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg)); error = set_mcontext(td, &mcp64); return (error); } #endif #ifdef COMPAT_FREEBSD32 int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } #endif void cpu_set_syscall_retval(struct thread *td, int error) { struct proc *p; struct trapframe *tf; int fixup; if (error == EJUSTRETURN) return; p = td->td_proc; tf = td->td_frame; if (tf->fixreg[0] == SYS___syscall && (SV_PROC_FLAG(p, SV_ILP32))) { int code = tf->fixreg[FIRSTARG + 1]; - if (p->p_sysent->sv_mask) - code &= p->p_sysent->sv_mask; fixup = ( #if defined(COMPAT_FREEBSD6) && defined(SYS_freebsd6_lseek) code != SYS_freebsd6_lseek && #endif code != SYS_lseek) ? 1 : 0; } else fixup = 0; switch (error) { case 0: if (fixup) { /* * 64-bit return, 32-bit syscall. Fixup byte order */ tf->fixreg[FIRSTARG] = 0; tf->fixreg[FIRSTARG + 1] = td->td_retval[0]; } else { tf->fixreg[FIRSTARG] = td->td_retval[0]; tf->fixreg[FIRSTARG + 1] = td->td_retval[1]; } tf->cr &= ~0x10000000; /* Unset summary overflow */ break; case ERESTART: /* * Set user's pc back to redo the system call. */ tf->srr0 -= 4; break; default: tf->fixreg[FIRSTARG] = SV_ABI_ERRNO(p, error); tf->cr |= 0x10000000; /* Set summary overflow */ break; } } /* * Threading functions */ void cpu_thread_exit(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~0x2fUL); td->td_pcb = pcb; td->td_frame = (struct trapframe *)pcb - 1; } void cpu_thread_free(struct thread *td) { } int cpu_set_user_tls(struct thread *td, void *tls_base) { if (SV_PROC_FLAG(td->td_proc, SV_LP64)) td->td_frame->fixreg[13] = (register_t)tls_base + 0x7010; else td->td_frame->fixreg[2] = (register_t)tls_base + 0x7008; return (0); } void cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; struct trapframe *tf; struct callframe *cf; pcb2 = td->td_pcb; /* Copy the upcall pcb */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); /* Create a stack for the new thread */ tf = td->td_frame; bcopy(td0->td_frame, tf, sizeof(struct trapframe)); tf->fixreg[FIRSTARG] = 0; tf->fixreg[FIRSTARG + 1] = 0; tf->cr &= ~0x10000000; /* Set registers for trampoline to user mode. */ cf = (struct callframe *)tf - 1; memset(cf, 0, sizeof(struct callframe)); cf->cf_func = (register_t)fork_return; cf->cf_arg0 = (register_t)td; cf->cf_arg1 = (register_t)tf; pcb2->pcb_sp = (register_t)cf; #if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1) pcb2->pcb_lr = ((register_t *)fork_trampoline)[0]; pcb2->pcb_toc = ((register_t *)fork_trampoline)[1]; #else pcb2->pcb_lr = (register_t)fork_trampoline; pcb2->pcb_context[0] = pcb2->pcb_lr; #endif pcb2->pcb_cpu.aim.usr_vsid = 0; #ifdef __SPE__ pcb2->pcb_vec.vscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE; #endif /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_msr = psl_kernset; } void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf; uintptr_t sp; tf = td->td_frame; /* align stack and alloc space for frame ptr and saved LR */ #ifdef __powerpc64__ sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 48) & ~0x1f; #else sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 8) & ~0x1f; #endif bzero(tf, sizeof(struct trapframe)); tf->fixreg[1] = (register_t)sp; tf->fixreg[3] = (register_t)arg; if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { tf->srr0 = (register_t)entry; #ifdef __powerpc64__ tf->srr1 = psl_userset32 | PSL_FE_DFLT; #else tf->srr1 = psl_userset | PSL_FE_DFLT; #endif } else { #ifdef __powerpc64__ if (td->td_proc->p_sysent == &elf64_freebsd_sysvec_v2) { tf->srr0 = (register_t)entry; /* ELFv2 ABI requires that the global entry point be in r12. */ tf->fixreg[12] = (register_t)entry; } else { register_t entry_desc[3]; (void)copyin((void *)entry, entry_desc, sizeof(entry_desc)); tf->srr0 = entry_desc[0]; tf->fixreg[2] = entry_desc[1]; tf->fixreg[11] = entry_desc[2]; } tf->srr1 = psl_userset | PSL_FE_DFLT; #endif } td->td_pcb->pcb_flags = 0; #ifdef __SPE__ td->td_pcb->pcb_vec.vscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE; #endif td->td_retval[0] = (register_t)entry; td->td_retval[1] = 0; } static int emulate_mfspr(int spr, int reg, struct trapframe *frame){ struct thread *td; td = curthread; if (spr == SPR_DSCR) { // If DSCR was never set, get the default DSCR if ((td->td_pcb->pcb_flags & PCB_CDSCR) == 0) td->td_pcb->pcb_dscr = mfspr(SPR_DSCR); frame->fixreg[reg] = td->td_pcb->pcb_dscr; frame->srr0 += 4; return 0; } else return SIGILL; } static int emulate_mtspr(int spr, int reg, struct trapframe *frame){ struct thread *td; td = curthread; if (spr == SPR_DSCR) { td->td_pcb->pcb_flags |= PCB_CDSCR; td->td_pcb->pcb_dscr = frame->fixreg[reg]; frame->srr0 += 4; return 0; } else return SIGILL; } #define XFX 0xFC0007FF int ppc_instr_emulate(struct trapframe *frame, struct pcb *pcb) { uint32_t instr; int reg, sig; int rs, spr; instr = fuword32((void *)frame->srr0); sig = SIGILL; if ((instr & 0xfc1fffff) == 0x7c1f42a6) { /* mfpvr */ reg = (instr & ~0xfc1fffff) >> 21; frame->fixreg[reg] = mfpvr(); frame->srr0 += 4; return (0); } else if ((instr & XFX) == 0x7c0002a6) { /* mfspr */ rs = (instr & 0x3e00000) >> 21; spr = (instr & 0x1ff800) >> 16; return emulate_mfspr(spr, rs, frame); } else if ((instr & XFX) == 0x7c0003a6) { /* mtspr */ rs = (instr & 0x3e00000) >> 21; spr = (instr & 0x1ff800) >> 16; return emulate_mtspr(spr, rs, frame); } else if ((instr & 0xfc000ffe) == 0x7c0004ac) { /* various sync */ powerpc_sync(); /* Do a heavy-weight sync */ frame->srr0 += 4; return (0); } #ifdef FPU_EMU if (!(pcb->pcb_flags & PCB_FPREGS)) { bzero(&pcb->pcb_fpu, sizeof(pcb->pcb_fpu)); pcb->pcb_flags |= PCB_FPREGS; } sig = fpu_emulate(frame, &pcb->pcb_fpu); #endif if (sig == SIGILL) { if (pcb->pcb_lastill != frame->srr0) { /* Allow a second chance, in case of cache sync issues. */ sig = 0; pmap_sync_icache(PCPU_GET(curpmap), frame->srr0, 4); pcb->pcb_lastill = frame->srr0; } } return (sig); } Index: head/sys/powerpc/powerpc/trap.c =================================================================== --- head/sys/powerpc/powerpc/trap.c (revision 342242) +++ head/sys/powerpc/powerpc/trap.c (revision 342243) @@ -1,992 +1,990 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Below matches setjmp.S */ #define FAULTBUF_LR 21 #define FAULTBUF_R1 1 #define FAULTBUF_R2 2 #define FAULTBUF_CR 22 #define FAULTBUF_R14 3 #define MOREARGS(sp) ((caddr_t)((uintptr_t)(sp) + \ sizeof(struct callframe) - 3*sizeof(register_t))) /* more args go here */ static void trap_fatal(struct trapframe *frame); static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user); static int trap_pfault(struct trapframe *frame, int user); static int fix_unaligned(struct thread *td, struct trapframe *frame); static int handle_onfault(struct trapframe *frame); static void syscall(struct trapframe *frame); #if defined(__powerpc64__) && defined(AIM) void handle_kernel_slb_spill(int, register_t, register_t); static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr); extern int n_slbs; static void normalize_inputs(void); #endif extern vm_offset_t __startkernel; #ifdef KDB int db_trap_glue(struct trapframe *); /* Called from trap_subr.S */ #endif struct powerpc_exception { u_int vector; char *name; }; #ifdef KDTRACE_HOOKS #include int (*dtrace_invop_jump_addr)(struct trapframe *); #endif static struct powerpc_exception powerpc_exceptions[] = { { EXC_CRIT, "critical input" }, { EXC_RST, "system reset" }, { EXC_MCHK, "machine check" }, { EXC_DSI, "data storage interrupt" }, { EXC_DSE, "data segment exception" }, { EXC_ISI, "instruction storage interrupt" }, { EXC_ISE, "instruction segment exception" }, { EXC_EXI, "external interrupt" }, { EXC_ALI, "alignment" }, { EXC_PGM, "program" }, { EXC_HEA, "hypervisor emulation assistance" }, { EXC_FPU, "floating-point unavailable" }, { EXC_APU, "auxiliary proc unavailable" }, { EXC_DECR, "decrementer" }, { EXC_FIT, "fixed-interval timer" }, { EXC_WDOG, "watchdog timer" }, { EXC_SC, "system call" }, { EXC_TRC, "trace" }, { EXC_FPA, "floating-point assist" }, { EXC_DEBUG, "debug" }, { EXC_PERF, "performance monitoring" }, { EXC_VEC, "altivec unavailable" }, { EXC_VSX, "vsx unavailable" }, { EXC_FAC, "facility unavailable" }, { EXC_ITMISS, "instruction tlb miss" }, { EXC_DLMISS, "data load tlb miss" }, { EXC_DSMISS, "data store tlb miss" }, { EXC_BPT, "instruction breakpoint" }, { EXC_SMI, "system management" }, { EXC_VECAST_G4, "altivec assist" }, { EXC_THRM, "thermal management" }, { EXC_RUNMODETRC, "run mode/trace" }, { EXC_SOFT_PATCH, "soft patch exception" }, { EXC_LAST, NULL } }; #define ESR_BITMASK \ "\20" \ "\040b0\037b1\036b2\035b3\034PIL\033PRR\032PTR\031FP" \ "\030ST\027b9\026DLK\025ILK\024b12\023b13\022BO\021PIE" \ "\020b16\017b17\016b18\015b19\014b20\013b21\012b22\011b23" \ "\010SPE\007EPID\006b26\005b27\004b28\003b29\002b30\001b31" #define MCSR_BITMASK \ "\20" \ "\040MCP\037ICERR\036DCERR\035TLBPERR\034L2MMU_MHIT\033b5\032b6\031b7" \ "\030b8\027b9\026b10\025NMI\024MAV\023MEA\022b14\021IF" \ "\020LD\017ST\016LDG\015b19\014b20\013b21\012b22\011b23" \ "\010b24\007b25\006b26\005b27\004b28\003b29\002TLBSYNC\001BSL2_ERR" #define MSSSR_BITMASK \ "\20" \ "\040b0\037b1\036b2\035b3\034b4\033b5\032b6\031b7" \ "\030b8\027b9\026b10\025b11\024b12\023L2TAG\022L2DAT\021L3TAG" \ "\020L3DAT\017APE\016DPE\015TEA\014b20\013b21\012b22\011b23" \ "\010b24\007b25\006b26\005b27\004b28\003b29\002b30\001b31" static const char * trapname(u_int vector) { struct powerpc_exception *pe; for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) { if (pe->vector == vector) return (pe->name); } return ("unknown"); } static inline bool frame_is_trap_inst(struct trapframe *frame) { #ifdef AIM return (frame->exc == EXC_PGM && frame->srr1 & EXC_PGM_TRAP); #else return ((frame->cpu.booke.esr & ESR_PTR) != 0); #endif } void trap(struct trapframe *frame) { struct thread *td; struct proc *p; #ifdef KDTRACE_HOOKS uint32_t inst; #endif int sig, type, user; u_int ucode; ksiginfo_t ksi; register_t fscr; VM_CNT_INC(v_trap); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; p = td->td_proc; type = ucode = frame->exc; sig = 0; user = frame->srr1 & PSL_PR; CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name, trapname(type), user ? "user" : "kernel"); #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * If the DTrace kernel module has registered a trap handler, * call it and if it returns non-zero, assume that it has * handled the trap and modified the trap frame so that this * function can return normally. */ if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0) return; #endif if (user) { td->td_pticks = 0; td->td_frame = frame; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); /* User Mode Traps */ switch (type) { case EXC_RUNMODETRC: case EXC_TRC: frame->srr1 &= ~PSL_SE; sig = SIGTRAP; ucode = TRAP_TRACE; break; #if defined(__powerpc64__) && defined(AIM) case EXC_ISE: case EXC_DSE: if (handle_user_slb_spill(&p->p_vmspace->vm_pmap, (type == EXC_ISE) ? frame->srr0 : frame->dar) != 0){ sig = SIGSEGV; ucode = SEGV_MAPERR; } break; #endif case EXC_DSI: case EXC_ISI: sig = trap_pfault(frame, 1); if (sig == SIGSEGV) ucode = SEGV_MAPERR; break; case EXC_SC: syscall(frame); break; case EXC_FPU: KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU, ("FPU already enabled for thread")); enable_fpu(td); break; case EXC_VEC: KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC, ("Altivec already enabled for thread")); enable_vec(td); break; case EXC_VSX: KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX, ("VSX already enabled for thread")); if (!(td->td_pcb->pcb_flags & PCB_VEC)) enable_vec(td); if (!(td->td_pcb->pcb_flags & PCB_FPU)) save_fpu(td); td->td_pcb->pcb_flags |= PCB_VSX; enable_fpu(td); break; case EXC_FAC: fscr = mfspr(SPR_FSCR); if ((fscr & FSCR_IC_MASK) == FSCR_IC_HTM) { CTR0(KTR_TRAP, "Hardware Transactional Memory subsystem disabled"); } sig = SIGILL; ucode = ILL_ILLOPC; break; case EXC_HEA: sig = SIGILL; ucode = ILL_ILLOPC; break; case EXC_VECAST_E: case EXC_VECAST_G4: case EXC_VECAST_G5: /* * We get a VPU assist exception for IEEE mode * vector operations on denormalized floats. * Emulating this is a giant pain, so for now, * just switch off IEEE mode and treat them as * zero. */ save_vec(td); td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ; enable_vec(td); break; case EXC_ALI: if (fix_unaligned(td, frame) != 0) { sig = SIGBUS; ucode = BUS_ADRALN; } else frame->srr0 += 4; break; case EXC_DEBUG: /* Single stepping */ mtspr(SPR_DBSR, mfspr(SPR_DBSR)); frame->srr1 &= ~PSL_DE; frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); sig = SIGTRAP; ucode = TRAP_TRACE; break; case EXC_PGM: /* Identify the trap reason */ if (frame_is_trap_inst(frame)) { #ifdef KDTRACE_HOOKS inst = fuword32((const void *)frame->srr0); if (inst == 0x0FFFDDDD && dtrace_pid_probe_ptr != NULL) { (*dtrace_pid_probe_ptr)(frame); break; } #endif sig = SIGTRAP; ucode = TRAP_BRKPT; } else { sig = ppc_instr_emulate(frame, td->td_pcb); if (sig == SIGILL) { if (frame->srr1 & EXC_PGM_PRIV) ucode = ILL_PRVOPC; else if (frame->srr1 & EXC_PGM_ILLEGAL) ucode = ILL_ILLOPC; } else if (sig == SIGFPE) ucode = FPE_FLTINV; /* Punt for now, invalid operation. */ } break; case EXC_MCHK: /* * Note that this may not be recoverable for the user * process, depending on the type of machine check, * but it at least prevents the kernel from dying. */ sig = SIGBUS; ucode = BUS_OBJERR; break; #if defined(__powerpc64__) && defined(AIM) case EXC_SOFT_PATCH: /* * Point to the instruction that generated the exception to execute it again, * and normalize the register values. */ frame->srr0 -= 4; normalize_inputs(); break; #endif default: trap_fatal(frame); } } else { /* Kernel Mode Traps */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case EXC_PGM: #ifdef KDTRACE_HOOKS if (frame_is_trap_inst(frame)) { if (*(uint32_t *)frame->srr0 == EXC_DTRACE) { if (dtrace_invop_jump_addr != NULL) { dtrace_invop_jump_addr(frame); return; } } } #endif #ifdef KDB if (db_trap_glue(frame)) return; #endif break; #if defined(__powerpc64__) && defined(AIM) case EXC_DSE: if (td->td_pcb->pcb_cpu.aim.usr_vsid != 0 && (frame->dar & SEGMENT_MASK) == USER_ADDR) { __asm __volatile ("slbmte %0, %1" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); return; } break; #endif case EXC_DSI: if (trap_pfault(frame, 0) == 0) return; break; case EXC_MCHK: if (handle_onfault(frame)) return; break; default: break; } trap_fatal(frame); } if (sig != 0) { if (p->p_sysent->sv_transtrap != NULL) sig = (p->p_sysent->sv_transtrap)(sig, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = (int) ucode; /* XXX, not POSIX */ ksi.ksi_addr = (void *)frame->srr0; ksi.ksi_trapno = type; trapsignal(td, &ksi); } userret(td, frame); } static void trap_fatal(struct trapframe *frame) { #ifdef KDB bool handled; #endif printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR)); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(frame->exc, 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("%s trap", trapname(frame->exc)); } static void cpu_printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) { #ifdef AIM uint16_t ver; switch (vector) { case EXC_DSE: case EXC_DSI: case EXC_DTMISS: printf(" dsisr = 0x%lx\n", (u_long)frame->cpu.aim.dsisr); break; case EXC_MCHK: ver = mfpvr() >> 16; if (MPC745X_P(ver)) printf(" msssr0 = 0x%b\n", (int)mfspr(SPR_MSSSR0), MSSSR_BITMASK); break; } #elif defined(BOOKE) vm_paddr_t pa; switch (vector) { case EXC_MCHK: pa = mfspr(SPR_MCARU); pa = (pa << 32) | (u_register_t)mfspr(SPR_MCAR); printf(" mcsr = 0x%b\n", (int)mfspr(SPR_MCSR), MCSR_BITMASK); printf(" mcar = 0x%jx\n", (uintmax_t)pa); } printf(" esr = 0x%b\n", (int)frame->cpu.booke.esr, ESR_BITMASK); #endif } static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) { printf("\n"); printf("%s %s trap:\n", isfatal ? "fatal" : "handled", user ? "user" : "kernel"); printf("\n"); printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); switch (vector) { case EXC_DSE: case EXC_DSI: case EXC_DTMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->dar); break; case EXC_ISE: case EXC_ISI: case EXC_ITMISS: printf(" virtual address = 0x%" PRIxPTR "\n", frame->srr0); break; case EXC_MCHK: break; } cpu_printtrap(vector, frame, isfatal, user); printf(" srr0 = 0x%" PRIxPTR " (0x%" PRIxPTR ")\n", frame->srr0, frame->srr0 - (register_t)(__startkernel - KERNBASE)); printf(" srr1 = 0x%lx\n", (u_long)frame->srr1); printf(" current msr = 0x%" PRIxPTR "\n", mfmsr()); printf(" lr = 0x%" PRIxPTR " (0x%" PRIxPTR ")\n", frame->lr, frame->lr - (register_t)(__startkernel - KERNBASE)); printf(" curthread = %p\n", curthread); if (curthread != NULL) printf(" pid = %d, comm = %s\n", curthread->td_proc->p_pid, curthread->td_name); printf("\n"); } /* * Handles a fatal fault when we have onfault state to recover. Returns * non-zero if there was onfault recovery state available. */ static int handle_onfault(struct trapframe *frame) { struct thread *td; jmp_buf *fb; td = curthread; fb = td->td_pcb->pcb_onfault; if (fb != NULL) { frame->srr0 = (*fb)->_jb[FAULTBUF_LR]; frame->fixreg[1] = (*fb)->_jb[FAULTBUF_R1]; frame->fixreg[2] = (*fb)->_jb[FAULTBUF_R2]; frame->fixreg[3] = 1; frame->cr = (*fb)->_jb[FAULTBUF_CR]; bcopy(&(*fb)->_jb[FAULTBUF_R14], &frame->fixreg[14], 18 * sizeof(register_t)); td->td_pcb->pcb_onfault = NULL; /* Returns twice, not thrice */ return (1); } return (0); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; size_t argsz; int error, n, i; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->fixreg[0]; params = (caddr_t)(frame->fixreg + FIRSTARG); n = NARGREG; if (sa->code == SYS_syscall) { /* * code is first argument, * followed by actual args. */ sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, * so as to maintain quad alignment * for the rest of the args. */ if (SV_PROC_FLAG(p, SV_ILP32)) { params += sizeof(register_t); sa->code = *(register_t *) params; params += sizeof(register_t); n -= 2; } else { sa->code = *(register_t *) params; params += sizeof(register_t); n -= 1; } } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (SV_PROC_FLAG(p, SV_ILP32)) { argsz = sizeof(uint32_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i] & 0xffffffff; } else { argsz = sizeof(uint64_t); for (i = 0; i < n; i++) sa->args[i] = ((u_register_t *)(params))[i]; } if (sa->narg > n) error = copyin(MOREARGS(frame->fixreg[1]), sa->args + n, (sa->narg - n) * argsz); else error = 0; #ifdef __powerpc64__ if (SV_PROC_FLAG(p, SV_ILP32) && sa->narg > n) { /* Expand the size of arguments copied from the stack */ for (i = sa->narg; i >= n; i--) sa->args[i] = ((uint32_t *)(&sa->args[n]))[i-n]; } #endif if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->fixreg[FIRSTARG + 1]; } return (error); } #include "../../kern/subr_syscall.c" void syscall(struct trapframe *frame) { struct thread *td; int error; td = curthread; td->td_frame = frame; #if defined(__powerpc64__) && defined(AIM) /* * Speculatively restore last user SLB segment, which we know is * invalid already, since we are likely to do copyin()/copyout(). */ if (td->td_pcb->pcb_cpu.aim.usr_vsid != 0) __asm __volatile ("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #endif error = syscallenter(td); syscallret(td, error); } #if defined(__powerpc64__) && defined(AIM) /* Handle kernel SLB faults -- runs in real mode, all seat belts off */ void handle_kernel_slb_spill(int type, register_t dar, register_t srr0) { struct slb *slbcache; uint64_t slbe, slbv; uint64_t esid, addr; int i; addr = (type == EXC_ISE) ? srr0 : dar; slbcache = PCPU_GET(aim.slb); esid = (uintptr_t)addr >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; /* See if the hardware flushed this somehow (can happen in LPARs) */ for (i = 0; i < n_slbs; i++) if (slbcache[i].slbe == (slbe | (uint64_t)i)) return; /* Not in the map, needs to actually be added */ slbv = kernel_va_to_slbv(addr); if (slbcache[USER_SLB_SLOT].slbe == 0) { for (i = 0; i < n_slbs; i++) { if (i == USER_SLB_SLOT) continue; if (!(slbcache[i].slbe & SLBE_VALID)) goto fillkernslb; } if (i == n_slbs) slbcache[USER_SLB_SLOT].slbe = 1; } /* Sacrifice a random SLB entry that is not the user entry */ i = mftb() % n_slbs; if (i == USER_SLB_SLOT) i = (i+1) % n_slbs; fillkernslb: /* Write new entry */ slbcache[i].slbv = slbv; slbcache[i].slbe = slbe | (uint64_t)i; /* Trap handler will restore from cache on exit */ } static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr) { struct slb *user_entry; uint64_t esid; int i; if (pm->pm_slb == NULL) return (-1); esid = (uintptr_t)addr >> ADDR_SR_SHFT; PMAP_LOCK(pm); user_entry = user_va_to_slb_entry(pm, addr); if (user_entry == NULL) { /* allocate_vsid auto-spills it */ (void)allocate_user_vsid(pm, esid, 0); } else { /* * Check that another CPU has not already mapped this. * XXX: Per-thread SLB caches would be better. */ for (i = 0; i < pm->pm_slb_len; i++) if (pm->pm_slb[i] == user_entry) break; if (i == pm->pm_slb_len) slb_insert_user(pm, user_entry); } PMAP_UNLOCK(pm); return (0); } #endif static int trap_pfault(struct trapframe *frame, int user) { vm_offset_t eva, va; struct thread *td; struct proc *p; vm_map_t map; vm_prot_t ftype; int rv, is_user; td = curthread; p = td->td_proc; if (frame->exc == EXC_ISI) { eva = frame->srr0; ftype = VM_PROT_EXECUTE; if (frame->srr1 & SRR1_ISI_PFAULT) ftype |= VM_PROT_READ; } else { eva = frame->dar; #ifdef BOOKE if (frame->cpu.booke.esr & ESR_ST) #else if (frame->cpu.aim.dsisr & DSISR_STORE) #endif ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; } if (user) { KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); map = &p->p_vmspace->vm_map; } else { rv = pmap_decode_kernel_ptr(eva, &is_user, &eva); if (rv != 0) return (SIGSEGV); if (is_user) map = &p->p_vmspace->vm_map; else map = kernel_map; } va = trunc_page(eva); /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ if (rv == KERN_SUCCESS) return (0); if (!user && handle_onfault(frame)) return (0); return (SIGSEGV); } /* * For now, this only deals with the particular unaligned access case * that gcc tends to generate. Eventually it should handle all of the * possibilities that can happen on a 32-bit PowerPC in big-endian mode. */ static int fix_unaligned(struct thread *td, struct trapframe *frame) { struct thread *fputhread; #ifdef __SPE__ uint32_t inst; #endif int indicator, reg; double *fpr; #ifdef __SPE__ indicator = (frame->cpu.booke.esr & (ESR_ST|ESR_SPE)); if (indicator & ESR_SPE) { if (copyin((void *)frame->srr0, &inst, sizeof(inst)) != 0) return (-1); reg = EXC_ALI_SPE_REG(inst); fpr = (double *)td->td_pcb->pcb_vec.vr[reg]; fputhread = PCPU_GET(vecthread); /* Juggle the SPE to ensure that we've initialized * the registers, and that their current state is in * the PCB. */ if (fputhread != td) { if (fputhread) save_vec(fputhread); enable_vec(td); } save_vec(td); if (!(indicator & ESR_ST)) { if (copyin((void *)frame->dar, fpr, sizeof(double)) != 0) return (-1); frame->fixreg[reg] = td->td_pcb->pcb_vec.vr[reg][1]; enable_vec(td); } else { td->td_pcb->pcb_vec.vr[reg][1] = frame->fixreg[reg]; if (copyout(fpr, (void *)frame->dar, sizeof(double)) != 0) return (-1); } return (0); } #else indicator = EXC_ALI_OPCODE_INDICATOR(frame->cpu.aim.dsisr); switch (indicator) { case EXC_ALI_LFD: case EXC_ALI_STFD: reg = EXC_ALI_RST(frame->cpu.aim.dsisr); fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr; fputhread = PCPU_GET(fputhread); /* Juggle the FPU to ensure that we've initialized * the FPRs, and that their current state is in * the PCB. */ if (fputhread != td) { if (fputhread) save_fpu(fputhread); enable_fpu(td); } save_fpu(td); if (indicator == EXC_ALI_LFD) { if (copyin((void *)frame->dar, fpr, sizeof(double)) != 0) return (-1); enable_fpu(td); } else { if (copyout(fpr, (void *)frame->dar, sizeof(double)) != 0) return (-1); } return (0); break; } #endif return (-1); } #if defined(__powerpc64__) && defined(AIM) #define MSKNSHL(x, m, n) "(((" #x ") & " #m ") << " #n ")" #define MSKNSHR(x, m, n) "(((" #x ") & " #m ") >> " #n ")" /* xvcpsgndp instruction, built in opcode format. * This can be changed to use mnemonic after a toolchain update. */ #define XVCPSGNDP(xt, xa, xb) \ __asm __volatile(".long (" \ MSKNSHL(60, 0x3f, 26) " | " \ MSKNSHL(xt, 0x1f, 21) " | " \ MSKNSHL(xa, 0x1f, 16) " | " \ MSKNSHL(xb, 0x1f, 11) " | " \ MSKNSHL(240, 0xff, 3) " | " \ MSKNSHR(xa, 0x20, 3) " | " \ MSKNSHR(xa, 0x20, 4) " | " \ MSKNSHR(xa, 0x20, 5) ")") /* Macros to normalize 1 or 10 VSX registers */ #define NORM(x) XVCPSGNDP(x, x, x) #define NORM10(x) \ NORM(x ## 0); NORM(x ## 1); NORM(x ## 2); NORM(x ## 3); NORM(x ## 4); \ NORM(x ## 5); NORM(x ## 6); NORM(x ## 7); NORM(x ## 8); NORM(x ## 9) static void normalize_inputs(void) { unsigned long msr; /* enable VSX */ msr = mfmsr(); mtmsr(msr | PSL_VSX); NORM(0); NORM(1); NORM(2); NORM(3); NORM(4); NORM(5); NORM(6); NORM(7); NORM(8); NORM(9); NORM10(1); NORM10(2); NORM10(3); NORM10(4); NORM10(5); NORM(60); NORM(61); NORM(62); NORM(63); /* restore MSR */ mtmsr(msr); } #endif #ifdef KDB int db_trap_glue(struct trapframe *frame) { if (!(frame->srr1 & PSL_PR) && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC || frame_is_trap_inst(frame) || frame->exc == EXC_BPT || frame->exc == EXC_DEBUG || frame->exc == EXC_DSI)) { int type = frame->exc; /* Ignore DTrace traps. */ if (*(uint32_t *)frame->srr0 == EXC_DTRACE) return (0); if (frame_is_trap_inst(frame)) { type = T_BREAKPOINT; } return (kdb_trap(type, 0, frame)); } return (0); } #endif Index: head/sys/riscv/riscv/elf_machdep.c =================================================================== --- head/sys/riscv/riscv/elf_machdep.c (revision 342242) +++ head/sys/riscv/riscv/elf_machdep.c (revision 342243) @@ -1,523 +1,522 @@ /*- * Copyright 1996-1998 John D. Polstra. * Copyright (c) 2015 Ruslan Bukin * Copyright (c) 2016 Yukishige Shibata * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_RISCV, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info); static int debug_kld; SYSCTL_INT(_kern, OID_AUTO, debug_kld, CTLFLAG_RW, &debug_kld, 0, "Activate debug prints in elf_reloc_internal()"); struct type2str_ent { int type; const char *str; }; void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { } /* * Following 4 functions are used to manupilate bits on 32bit interger value. * FIXME: I implemetend for ease-to-understand rather than for well-optimized. */ static uint32_t gen_bitmask(int msb, int lsb) { uint32_t mask; if (msb == sizeof(mask) * 8 - 1) mask = ~0; else mask = (1U << (msb + 1)) - 1; if (lsb > 0) mask &= ~((1U << lsb) - 1); return (mask); } static uint32_t extract_bits(uint32_t x, int msb, int lsb) { uint32_t mask; mask = gen_bitmask(msb, lsb); x &= mask; x >>= lsb; return (x); } static uint32_t insert_bits(uint32_t d, uint32_t s, int msb, int lsb) { uint32_t mask; mask = gen_bitmask(msb, lsb); d &= ~mask; s <<= lsb; s &= mask; return (d | s); } static uint32_t insert_imm(uint32_t insn, uint32_t imm, int imm_msb, int imm_lsb, int insn_lsb) { int insn_msb; uint32_t v; v = extract_bits(imm, imm_msb, imm_lsb); insn_msb = (imm_msb - imm_lsb) + insn_lsb; return (insert_bits(insn, v, insn_msb, insn_lsb)); } /* * The RISC-V ISA is designed so that all of immediate values are * sign-extended. * An immediate value is sometimes generated at runtime by adding * 12bit sign integer and 20bit signed integer. This requests 20bit * immediate value to be ajusted if the MSB of the 12bit immediate * value is asserted (sign-extended value is treated as negative value). * * For example, 0x123800 can be calculated by adding upper 20 bit of * 0x124000 and sign-extended 12bit immediate whose bit pattern is * 0x800 as follows: * 0x123800 * = 0x123000 + 0x800 * = (0x123000 + 0x1000) + (-0x1000 + 0x800) * = (0x123000 + 0x1000) + (0xff...ff800) * = 0x124000 + sign-extention(0x800) */ static uint32_t calc_hi20_imm(uint32_t value) { /* * There is the arithmetical hack that can remove conditional * statement. But I implement it in straightforward way. */ if ((value & 0x800) != 0) value += 0x1000; return (value & ~0xfff); } static const struct type2str_ent t2s[] = { { R_RISCV_NONE, "R_RISCV_NONE" }, { R_RISCV_64, "R_RISCV_64" }, { R_RISCV_JUMP_SLOT, "R_RISCV_JUMP_SLOT" }, { R_RISCV_RELATIVE, "R_RISCV_RELATIVE" }, { R_RISCV_JAL, "R_RISCV_JAL" }, { R_RISCV_CALL, "R_RISCV_CALL" }, { R_RISCV_PCREL_HI20, "R_RISCV_PCREL_HI20" }, { R_RISCV_PCREL_LO12_I, "R_RISCV_PCREL_LO12_I" }, { R_RISCV_PCREL_LO12_S, "R_RISCV_PCREL_LO12_S" }, { R_RISCV_HI20, "R_RISCV_HI20" }, { R_RISCV_LO12_I, "R_RISCV_LO12_I" }, { R_RISCV_LO12_S, "R_RISCV_LO12_S" }, }; static const char * reloctype_to_str(int type) { int i; for (i = 0; i < sizeof(t2s) / sizeof(t2s[0]); ++i) { if (type == t2s[i].type) return t2s[i].str; } return "*unknown*"; } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* * Currently kernel loadable module for RISCV is compiled with -fPIC option. * (see also additional CFLAGS definition for RISCV in sys/conf/kmod.mk) * Only R_RISCV_64, R_RISCV_JUMP_SLOT and RISCV_RELATIVE are emitted in * the module. Other relocations will be processed when kernel loadable * modules are built in non-PIC. * * FIXME: only RISCV64 is supported. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Size rtype, symidx; const Elf_Rela *rela; Elf_Addr val, addr; Elf64_Addr *where; Elf_Addr addend; uint32_t before32_1; uint32_t before32; uint64_t before64; uint32_t* insn32p; uint32_t imm20; int error; switch (type) { case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *)(relocbase + rela->r_offset); insn32p = (uint32_t*)where; addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: printf("%s:%d unknown reloc type %d\n", __FUNCTION__, __LINE__, type); return -1; } switch (rtype) { case R_RISCV_NONE: break; case R_RISCV_64: case R_RISCV_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; val = addr; before64 = *where; if (*where != val) *where = val; if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_RELATIVE: before64 = *where; *where = elf_relocaddr(lf, relocbase + addend); if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_JAL: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; val = addr - (Elf_Addr)where; if ((val <= -(1UL << 20) || (1UL << 20) <= val)) { printf("kldload: huge offset against R_RISCV_JAL\n"); return -1; } before32 = *insn32p; *insn32p = insert_imm(*insn32p, val, 20, 20, 31); *insn32p = insert_imm(*insn32p, val, 10, 1, 21); *insn32p = insert_imm(*insn32p, val, 11, 11, 20); *insn32p = insert_imm(*insn32p, val, 19, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_CALL: /* * R_RISCV_CALL relocates 8-byte region that consists * of the sequence of AUIPC and JALR. */ /* calculate and check the pc relative offset. */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; val = addr - (Elf_Addr)where; if ((val <= -(1UL << 32) || (1UL << 32) <= val)) { printf("kldload: huge offset against R_RISCV_CALL\n"); return -1; } /* Relocate AUIPC. */ before32 = insn32p[0]; imm20 = calc_hi20_imm(val); insn32p[0] = insert_imm(insn32p[0], imm20, 31, 12, 12); /* Relocate JALR. */ before32_1 = insn32p[1]; insn32p[1] = insert_imm(insn32p[1], val, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x %08x -> %08x %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, insn32p[0], before32_1, insn32p[1]); break; case R_RISCV_PCREL_HI20: val = addr - (Elf_Addr)where; insn32p = (uint32_t*)where; before32 = *insn32p; imm20 = calc_hi20_imm(val); *insn32p = insert_imm(*insn32p, imm20, 31, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_PCREL_LO12_I: val = addr - (Elf_Addr)where; insn32p = (uint32_t*)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_PCREL_LO12_S: val = addr - (Elf_Addr)where; insn32p = (uint32_t*)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 5, 25); *insn32p = insert_imm(*insn32p, addr, 4, 0, 7); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_HI20: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; insn32p = (uint32_t*)where; before32 = *insn32p; imm20 = calc_hi20_imm(val); *insn32p = insert_imm(*insn32p, imm20, 31, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_LO12_I: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; val = addr; insn32p = (uint32_t*)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_LO12_S: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; val = addr; insn32p = (uint32_t*)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 5, 25); *insn32p = insert_imm(*insn32p, addr, 4, 0, 7); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local? 'l': 'g'), reloctype_to_str(rtype), before32, *insn32p); break; default: printf("kldload: unexpected relocation type %ld\n", rtype); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/riscv/riscv/trap.c =================================================================== --- head/sys/riscv/riscv/trap.c (revision 342242) +++ head/sys/riscv/riscv/trap.c (revision 342243) @@ -1,398 +1,396 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #ifdef FPE #include #endif #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif int (*dtrace_invop_jump_addr)(struct trapframe *); extern register_t fsu_intr_fault; /* Called from exception.S */ void do_trap_supervisor(struct trapframe *); void do_trap_user(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int nap; nap = NARGREG; p = td->td_proc; sa = &td->td_sa; ap = &td->td_frame->tf_a[0]; sa->code = td->td_frame->tf_t[0]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); if (sa->narg > nap) panic("TODO: Could we have more then %d args?", NARGREG); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void dump_regs(struct trapframe *frame) { int n; int i; n = (sizeof(frame->tf_t) / sizeof(frame->tf_t[0])); for (i = 0; i < n; i++) printf("t[%d] == 0x%016lx\n", i, frame->tf_t[i]); n = (sizeof(frame->tf_s) / sizeof(frame->tf_s[0])); for (i = 0; i < n; i++) printf("s[%d] == 0x%016lx\n", i, frame->tf_s[i]); n = (sizeof(frame->tf_a) / sizeof(frame->tf_a[0])); for (i = 0; i < n; i++) printf("a[%d] == 0x%016lx\n", i, frame->tf_a[i]); printf("sepc == 0x%016lx\n", frame->tf_sepc); printf("sstatus == 0x%016lx\n", frame->tf_sstatus); } static void svc_handler(struct trapframe *frame) { struct thread *td; int error; td = curthread; td->td_frame = frame; error = syscallenter(td); syscallret(td, error); } static void data_abort(struct trapframe *frame, int usermode) { struct vm_map *map; uint64_t stval; struct thread *td; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; struct proc *p; int error, sig, ucode; #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; p = td->td_proc; pcb = td->td_pcb; stval = frame->tf_stval; if (td->td_critnest != 0 || td->td_intr_nesting_level != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) goto fatal; if (usermode) map = &td->td_proc->p_vmspace->vm_map; else if (stval >= VM_MAX_USER_ADDRESS) map = kernel_map; else { if (pcb->pcb_onfault == 0) goto fatal; map = &td->td_proc->p_vmspace->vm_map; } va = trunc_page(stval); if ((frame->tf_scause == EXCP_FAULT_STORE) || (frame->tf_scause == EXCP_STORE_PAGE_FAULT)) { ftype = VM_PROT_WRITE; } else if (frame->tf_scause == EXCP_INST_PAGE_FAULT) { ftype = VM_PROT_EXECUTE; } else { ftype = VM_PROT_READ; } if (pmap_fault_fixup(map->pmap, va, ftype)) goto done; if (map != kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PROC_LOCK(p); ++p->p_lock; PROC_UNLOCK(p); /* Fault in the user page: */ error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); PROC_LOCK(p); --p->p_lock; PROC_UNLOCK(p); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } if (error != KERN_SUCCESS) { if (usermode) { sig = SIGSEGV; if (error == KERN_PROTECTION_FAILURE) ucode = SEGV_ACCERR; else ucode = SEGV_MAPERR; call_trapsignal(td, sig, ucode, (void *)stval); } else { if (pcb->pcb_onfault != 0) { frame->tf_a[0] = error; frame->tf_sepc = pcb->pcb_onfault; return; } goto fatal; } } done: if (usermode) userret(td, frame); return; fatal: dump_regs(frame); panic("Fatal page fault at %#lx: %#016lx", frame->tf_sepc, stval); } void do_trap_supervisor(struct trapframe *frame) { uint64_t exception; uint64_t sstatus; /* Ensure we came from supervisor mode, interrupts disabled */ __asm __volatile("csrr %0, sstatus" : "=&r" (sstatus)); KASSERT((sstatus & (SSTATUS_SPP | SSTATUS_SIE)) == SSTATUS_SPP, ("We must came from S mode with interrupts disabled")); exception = (frame->tf_scause & EXCP_MASK); if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch(exception) { case EXCP_FAULT_LOAD: case EXCP_FAULT_STORE: case EXCP_FAULT_FETCH: case EXCP_STORE_PAGE_FAULT: case EXCP_LOAD_PAGE_FAULT: data_abort(frame, 0); break; case EXCP_BREAKPOINT: #ifdef KDTRACE_HOOKS if (dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(frame); break; } #endif #ifdef KDB kdb_trap(exception, 0, frame); #else dump_regs(frame); panic("No debugger in kernel.\n"); #endif break; case EXCP_ILLEGAL_INSTRUCTION: dump_regs(frame); panic("Illegal instruction at 0x%016lx\n", frame->tf_sepc); break; default: dump_regs(frame); panic("Unknown kernel exception %x trap value %lx\n", exception, frame->tf_stval); } } void do_trap_user(struct trapframe *frame) { uint64_t exception; struct thread *td; uint64_t sstatus; struct pcb *pcb; td = curthread; td->td_frame = frame; pcb = td->td_pcb; /* Ensure we came from usermode, interrupts disabled */ __asm __volatile("csrr %0, sstatus" : "=&r" (sstatus)); KASSERT((sstatus & (SSTATUS_SPP | SSTATUS_SIE)) == 0, ("We must came from U mode with interrupts disabled")); exception = (frame->tf_scause & EXCP_MASK); if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch(exception) { case EXCP_FAULT_LOAD: case EXCP_FAULT_STORE: case EXCP_FAULT_FETCH: case EXCP_STORE_PAGE_FAULT: case EXCP_LOAD_PAGE_FAULT: case EXCP_INST_PAGE_FAULT: data_abort(frame, 1); break; case EXCP_USER_ECALL: frame->tf_sepc += 4; /* Next instruction */ svc_handler(frame); break; case EXCP_ILLEGAL_INSTRUCTION: #ifdef FPE if ((pcb->pcb_fpflags & PCB_FP_STARTED) == 0) { /* * May be a FPE trap. Enable FPE usage * for this thread and try again. */ fpe_state_clear(); frame->tf_sstatus &= ~SSTATUS_FS_MASK; frame->tf_sstatus |= SSTATUS_FS_CLEAN; pcb->pcb_fpflags |= PCB_FP_STARTED; break; } #endif call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)frame->tf_sepc); userret(td, frame); break; case EXCP_BREAKPOINT: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_sepc); userret(td, frame); break; default: dump_regs(frame); panic("Unknown userland exception %x, trap value %lx\n", exception, frame->tf_stval); } } Index: head/sys/sparc64/sparc64/elf_machdep.c =================================================================== --- head/sys/sparc64/sparc64/elf_machdep.c (revision 342242) +++ head/sys/sparc64/sparc64/elf_machdep.c (revision 342243) @@ -1,432 +1,431 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 2000 Eduardo Horvath. * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Paul Kranenburg. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * from: NetBSD: mdreloc.c,v 1.42 2008/04/28 20:23:04 martin Exp */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linker_if.h" static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_SPARCV9, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_SPARCV9, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } /* * The following table holds for each relocation type: * - the width in bits of the memory location the relocation * applies to (not currently used) * - the number of bits the relocation value must be shifted to the * right (i.e. discard least significant bits) to fit into * the appropriate field in the instruction word. * - flags indicating whether * * the relocation involves a symbol * * the relocation is relative to the current position * * the relocation is for a GOT entry * * the relocation is relative to the load address * */ #define _RF_S 0x80000000 /* Resolve symbol */ #define _RF_A 0x40000000 /* Use addend */ #define _RF_P 0x20000000 /* Location relative */ #define _RF_G 0x10000000 /* GOT offset */ #define _RF_B 0x08000000 /* Load address relative */ #define _RF_U 0x04000000 /* Unaligned */ #define _RF_X 0x02000000 /* Bare symbols, needs proc */ #define _RF_D 0x01000000 /* Use dynamic TLS offset */ #define _RF_O 0x00800000 /* Use static TLS offset */ #define _RF_I 0x00400000 /* Use TLS object ID */ #define _RF_SZ(s) (((s) & 0xff) << 8) /* memory target size */ #define _RF_RS(s) ( (s) & 0xff) /* right shift */ static const int reloc_target_flags[] = { 0, /* NONE */ _RF_S|_RF_A| _RF_SZ(8) | _RF_RS(0), /* 8 */ _RF_S|_RF_A| _RF_SZ(16) | _RF_RS(0), /* 16 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* 32 */ _RF_S|_RF_A|_RF_P| _RF_SZ(8) | _RF_RS(0), /* DISP_8 */ _RF_S|_RF_A|_RF_P| _RF_SZ(16) | _RF_RS(0), /* DISP_16 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* DISP_32 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP_30 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP_22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(10), /* HI22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 13 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* LO10 */ _RF_G| _RF_SZ(32) | _RF_RS(0), /* GOT10 */ _RF_G| _RF_SZ(32) | _RF_RS(0), /* GOT13 */ _RF_G| _RF_SZ(32) | _RF_RS(10), /* GOT22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* PC10 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(10), /* PC22 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WPLT30 */ _RF_SZ(32) | _RF_RS(0), /* COPY */ _RF_S|_RF_A| _RF_SZ(64) | _RF_RS(0), /* GLOB_DAT */ _RF_SZ(32) | _RF_RS(0), /* JMP_SLOT */ _RF_A| _RF_B| _RF_SZ(64) | _RF_RS(0), /* RELATIVE */ _RF_S|_RF_A| _RF_U| _RF_SZ(32) | _RF_RS(0), /* UA_32 */ _RF_A| _RF_SZ(32) | _RF_RS(0), /* PLT32 */ _RF_A| _RF_SZ(32) | _RF_RS(10), /* HIPLT22 */ _RF_A| _RF_SZ(32) | _RF_RS(0), /* LOPLT10 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* PCPLT32 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(10), /* PCPLT22 */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(0), /* PCPLT10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 11 */ _RF_S|_RF_A|_RF_X| _RF_SZ(64) | _RF_RS(0), /* 64 */ _RF_S|_RF_A|/*extra*/ _RF_SZ(32) | _RF_RS(0), /* OLO10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(42), /* HH22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(32), /* HM10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(10), /* LM22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(42), /* PC_HH22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(32), /* PC_HM10 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(10), /* PC_LM22 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP16 */ _RF_S|_RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* WDISP19 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* GLOB_JMP */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 7 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 5 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* 6 */ _RF_S|_RF_A|_RF_P| _RF_SZ(64) | _RF_RS(0), /* DISP64 */ _RF_A| _RF_SZ(64) | _RF_RS(0), /* PLT64 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(10), /* HIX22 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* LOX10 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(22), /* H44 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(12), /* M44 */ _RF_S|_RF_A|_RF_X| _RF_SZ(32) | _RF_RS(0), /* L44 */ _RF_S|_RF_A| _RF_SZ(64) | _RF_RS(0), /* REGISTER */ _RF_S|_RF_A| _RF_U| _RF_SZ(64) | _RF_RS(0), /* UA64 */ _RF_S|_RF_A| _RF_U| _RF_SZ(16) | _RF_RS(0), /* UA16 */ #if 0 /* TLS */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* GD_HI22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* GD_LO10 */ 0, /* GD_ADD */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* GD_CALL */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* LDM_HI22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* LDM_LO10 */ 0, /* LDM_ADD */ _RF_A|_RF_P| _RF_SZ(32) | _RF_RS(2), /* LDM_CALL */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* LDO_HIX22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* LDO_LOX10 */ 0, /* LDO_ADD */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(10), /* IE_HI22 */ _RF_S|_RF_A| _RF_SZ(32) | _RF_RS(0), /* IE_LO10 */ 0, /* IE_LD */ 0, /* IE_LDX */ 0, /* IE_ADD */ _RF_S|_RF_A| _RF_O| _RF_SZ(32) | _RF_RS(10), /* LE_HIX22 */ _RF_S|_RF_A| _RF_O| _RF_SZ(32) | _RF_RS(0), /* LE_LOX10 */ _RF_S| _RF_I| _RF_SZ(32) | _RF_RS(0), /* DTPMOD32 */ _RF_S| _RF_I| _RF_SZ(64) | _RF_RS(0), /* DTPMOD64 */ _RF_S|_RF_A| _RF_D| _RF_SZ(32) | _RF_RS(0), /* DTPOFF32 */ _RF_S|_RF_A| _RF_D| _RF_SZ(64) | _RF_RS(0), /* DTPOFF64 */ _RF_S|_RF_A| _RF_O| _RF_SZ(32) | _RF_RS(0), /* TPOFF32 */ _RF_S|_RF_A| _RF_O| _RF_SZ(64) | _RF_RS(0) /* TPOFF64 */ #endif }; #if 0 static const char *const reloc_names[] = { "NONE", "8", "16", "32", "DISP_8", "DISP_16", "DISP_32", "WDISP_30", "WDISP_22", "HI22", "22", "13", "LO10", "GOT10", "GOT13", "GOT22", "PC10", "PC22", "WPLT30", "COPY", "GLOB_DAT", "JMP_SLOT", "RELATIVE", "UA_32", "PLT32", "HIPLT22", "LOPLT10", "LOPLT10", "PCPLT22", "PCPLT32", "10", "11", "64", "OLO10", "HH22", "HM10", "LM22", "PC_HH22", "PC_HM10", "PC_LM22", "WDISP16", "WDISP19", "GLOB_JMP", "7", "5", "6", "DISP64", "PLT64", "HIX22", "LOX10", "H44", "M44", "L44", "REGISTER", "UA64", "UA16", "GD_HI22", "GD_LO10", "GD_ADD", "GD_CALL", "LDM_HI22", "LDMO10", "LDM_ADD", "LDM_CALL", "LDO_HIX22", "LDO_LOX10", "LDO_ADD", "IE_HI22", "IE_LO10", "IE_LD", "IE_LDX", "IE_ADD", "LE_HIX22", "LE_LOX10", "DTPMOD32", "DTPMOD64", "DTPOFF32", "DTPOFF64", "TPOFF32", "TPOFF64" }; #endif #define RELOC_RESOLVE_SYMBOL(t) ((reloc_target_flags[t] & _RF_S) != 0) #define RELOC_PC_RELATIVE(t) ((reloc_target_flags[t] & _RF_P) != 0) #define RELOC_BASE_RELATIVE(t) ((reloc_target_flags[t] & _RF_B) != 0) #define RELOC_UNALIGNED(t) ((reloc_target_flags[t] & _RF_U) != 0) #define RELOC_USE_ADDEND(t) ((reloc_target_flags[t] & _RF_A) != 0) #define RELOC_BARE_SYMBOL(t) ((reloc_target_flags[t] & _RF_X) != 0) #define RELOC_USE_TLS_DOFF(t) ((reloc_target_flags[t] & _RF_D) != 0) #define RELOC_USE_TLS_OFF(t) ((reloc_target_flags[t] & _RF_O) != 0) #define RELOC_USE_TLS_ID(t) ((reloc_target_flags[t] & _RF_I) != 0) #define RELOC_TARGET_SIZE(t) ((reloc_target_flags[t] >> 8) & 0xff) #define RELOC_VALUE_RIGHTSHIFT(t) (reloc_target_flags[t] & 0xff) static const long reloc_target_bitmask[] = { #define _BM(x) (~(-(1ULL << (x)))) 0, /* NONE */ _BM(8), _BM(16), _BM(32), /* 8, 16, 32 */ _BM(8), _BM(16), _BM(32), /* DISP8, DISP16, DISP32 */ _BM(30), _BM(22), /* WDISP30, WDISP22 */ _BM(22), _BM(22), /* HI22, 22 */ _BM(13), _BM(10), /* 13, LO10 */ _BM(10), _BM(13), _BM(22), /* GOT10, GOT13, GOT22 */ _BM(10), _BM(22), /* PC10, PC22 */ _BM(30), 0, /* WPLT30, COPY */ _BM(32), _BM(32), _BM(32), /* GLOB_DAT, JMP_SLOT, RELATIVE */ _BM(32), _BM(32), /* UA32, PLT32 */ _BM(22), _BM(10), /* HIPLT22, LOPLT10 */ _BM(32), _BM(22), _BM(10), /* PCPLT32, PCPLT22, PCPLT10 */ _BM(10), _BM(11), -1, /* 10, 11, 64 */ _BM(13), _BM(22), /* OLO10, HH22 */ _BM(10), _BM(22), /* HM10, LM22 */ _BM(22), _BM(10), _BM(22), /* PC_HH22, PC_HM10, PC_LM22 */ _BM(16), _BM(19), /* WDISP16, WDISP19 */ -1, /* GLOB_JMP */ _BM(7), _BM(5), _BM(6), /* 7, 5, 6 */ -1, -1, /* DISP64, PLT64 */ _BM(22), _BM(13), /* HIX22, LOX10 */ _BM(22), _BM(10), _BM(13), /* H44, M44, L44 */ -1, -1, _BM(16), /* REGISTER, UA64, UA16 */ #if 0 _BM(22), _BM(10), 0, _BM(30), /* GD_HI22, GD_LO10, GD_ADD, GD_CALL */ _BM(22), _BM(10), 0, /* LDM_HI22, LDMO10, LDM_ADD */ _BM(30), /* LDM_CALL */ _BM(22), _BM(10), 0, /* LDO_HIX22, LDO_LOX10, LDO_ADD */ _BM(22), _BM(10), 0, 0, /* IE_HI22, IE_LO10, IE_LD, IE_LDX */ 0, /* IE_ADD */ _BM(22), _BM(13), /* LE_HIX22, LE_LOX10 */ _BM(32), -1, /* DTPMOD32, DTPMOD64 */ _BM(32), -1, /* DTPOFF32, DTPOFF64 */ _BM(32), -1 /* TPOFF32, TPOFF64 */ #endif #undef _BM }; #define RELOC_VALUE_BITMASK(t) (reloc_target_bitmask[t]) bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup __unused) { const Elf_Rela *rela; Elf_Addr *where; if (type != ELF_RELOC_RELA) return (-1); rela = (const Elf_Rela *)data; if (ELF64_R_TYPE_ID(rela->r_info) != R_SPARC_RELATIVE) return (-1); where = (Elf_Addr *)(relocbase + rela->r_offset); *where = elf_relocaddr(lf, rela->r_addend + relocbase); return (0); } /* Process one elf relocation with addend. */ int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { const Elf_Rela *rela; Elf_Word *where32; Elf_Addr *where; Elf_Size rtype, symidx; Elf_Addr value; Elf_Addr mask; Elf_Addr addr; int error; if (type != ELF_RELOC_RELA) return (-1); rela = (const Elf_Rela *)data; where = (Elf_Addr *)(relocbase + rela->r_offset); where32 = (Elf_Word *)where; rtype = ELF64_R_TYPE_ID(rela->r_info); symidx = ELF_R_SYM(rela->r_info); if (rtype == R_SPARC_NONE || rtype == R_SPARC_RELATIVE) return (0); if (rtype == R_SPARC_JMP_SLOT || rtype == R_SPARC_COPY || rtype >= nitems(reloc_target_bitmask)) { printf("kldload: unexpected relocation type %ld\n", rtype); return (-1); } if (RELOC_UNALIGNED(rtype)) { printf("kldload: unaligned relocation type %ld\n", rtype); return (-1); } value = rela->r_addend; if (RELOC_RESOLVE_SYMBOL(rtype)) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); value += addr; if (RELOC_BARE_SYMBOL(rtype)) value = elf_relocaddr(lf, value); } if (rtype == R_SPARC_OLO10) value = (value & 0x3ff) + ELF64_R_TYPE_DATA(rela->r_info); if (rtype == R_SPARC_HIX22) value ^= 0xffffffffffffffff; if (RELOC_PC_RELATIVE(rtype)) value -= (Elf_Addr)where; if (RELOC_BASE_RELATIVE(rtype)) value = elf_relocaddr(lf, value + relocbase); mask = RELOC_VALUE_BITMASK(rtype); value >>= RELOC_VALUE_RIGHTSHIFT(rtype); value &= mask; if (rtype == R_SPARC_LOX10) value |= 0x1c00; if (RELOC_TARGET_SIZE(rtype) > 32) { *where &= ~mask; *where |= value; } else { *where32 &= ~mask; *where32 |= value; } return (0); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: head/sys/sparc64/sparc64/trap.c =================================================================== --- head/sys/sparc64/sparc64/trap.c (revision 342242) +++ head/sys/sparc64/sparc64/trap.c (revision 342243) @@ -1,615 +1,613 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2001, Jake Burkholder * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktr.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void trap(struct trapframe *tf); void syscall(struct trapframe *tf); static int trap_cecc(void); static int trap_pfault(struct thread *td, struct trapframe *tf); extern char copy_fault[]; extern char copy_nofault_begin[]; extern char copy_nofault_end[]; extern char fs_fault[]; extern char fs_nofault_begin[]; extern char fs_nofault_end[]; extern char fas_fault[]; extern char fas_nofault_begin[]; extern char fas_nofault_end[]; const char *const trap_msg[] = { "reserved", "instruction access exception", "instruction access error", "instruction access protection", "illtrap instruction", "illegal instruction", "privileged opcode", "floating point disabled", "floating point exception ieee 754", "floating point exception other", "tag overflow", "division by zero", "data access exception", "data access error", "data access protection", "memory address not aligned", "privileged action", "async data error", "trap instruction 16", "trap instruction 17", "trap instruction 18", "trap instruction 19", "trap instruction 20", "trap instruction 21", "trap instruction 22", "trap instruction 23", "trap instruction 24", "trap instruction 25", "trap instruction 26", "trap instruction 27", "trap instruction 28", "trap instruction 29", "trap instruction 30", "trap instruction 31", "fast instruction access mmu miss", "fast data access mmu miss", "interrupt", "physical address watchpoint", "virtual address watchpoint", "corrected ecc error", "spill", "fill", "fill", "breakpoint", "clean window", "range check", "fix alignment", "integer overflow", "syscall", "restore physical watchpoint", "restore virtual watchpoint", "kernel stack fault", }; static const int trap_sig[] = { SIGILL, /* reserved */ SIGILL, /* instruction access exception */ SIGILL, /* instruction access error */ SIGILL, /* instruction access protection */ SIGILL, /* illtrap instruction */ SIGILL, /* illegal instruction */ SIGBUS, /* privileged opcode */ SIGFPE, /* floating point disabled */ SIGFPE, /* floating point exception ieee 754 */ SIGFPE, /* floating point exception other */ SIGEMT, /* tag overflow */ SIGFPE, /* division by zero */ SIGILL, /* data access exception */ SIGILL, /* data access error */ SIGBUS, /* data access protection */ SIGBUS, /* memory address not aligned */ SIGBUS, /* privileged action */ SIGBUS, /* async data error */ SIGILL, /* trap instruction 16 */ SIGILL, /* trap instruction 17 */ SIGILL, /* trap instruction 18 */ SIGILL, /* trap instruction 19 */ SIGILL, /* trap instruction 20 */ SIGILL, /* trap instruction 21 */ SIGILL, /* trap instruction 22 */ SIGILL, /* trap instruction 23 */ SIGILL, /* trap instruction 24 */ SIGILL, /* trap instruction 25 */ SIGILL, /* trap instruction 26 */ SIGILL, /* trap instruction 27 */ SIGILL, /* trap instruction 28 */ SIGILL, /* trap instruction 29 */ SIGILL, /* trap instruction 30 */ SIGILL, /* trap instruction 31 */ SIGSEGV, /* fast instruction access mmu miss */ SIGSEGV, /* fast data access mmu miss */ -1, /* interrupt */ -1, /* physical address watchpoint */ -1, /* virtual address watchpoint */ -1, /* corrected ecc error */ SIGILL, /* spill */ SIGILL, /* fill */ SIGILL, /* fill */ SIGTRAP, /* breakpoint */ SIGILL, /* clean window */ SIGILL, /* range check */ SIGILL, /* fix alignment */ SIGILL, /* integer overflow */ SIGSYS, /* syscall */ -1, /* restore physical watchpoint */ -1, /* restore virtual watchpoint */ -1, /* kernel stack fault */ }; CTASSERT(nitems(trap_msg) == T_MAX); CTASSERT(nitems(trap_sig) == T_MAX); CTASSERT(sizeof(struct trapframe) == 256); int debugger_on_signal = 0; SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW, &debugger_on_signal, 0, ""); u_int corrected_ecc = 0; SYSCTL_UINT(_machdep, OID_AUTO, corrected_ecc, CTLFLAG_RD, &corrected_ecc, 0, "corrected ECC errors"); /* * SUNW,set-trap-table allows to take over %tba from the PROM, which * will turn off interrupts and handle outstanding ones while doing so, * in a safe way. */ void sun4u_set_traptable(void *tba_addr) { static struct { cell_t name; cell_t nargs; cell_t nreturns; cell_t tba_addr; } args = { (cell_t)"SUNW,set-trap-table", 1, 0, }; args.tba_addr = (cell_t)tba_addr; ofw_entry(&args); } void trap(struct trapframe *tf) { struct thread *td; struct proc *p; int error; int sig, ucode; register_t addr; ksiginfo_t ksi; td = curthread; CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td, trap_msg[tf->tf_type & ~T_KERNEL], (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil)); VM_CNT_INC(v_trap); if ((tf->tf_tstate & TSTATE_PRIV) == 0) { KASSERT(td != NULL, ("trap: curthread NULL")); KASSERT(td->td_proc != NULL, ("trap: curproc NULL")); p = td->td_proc; td->td_pticks = 0; td->td_frame = tf; addr = tf->tf_tpc; ucode = (int)tf->tf_type; /* XXX not POSIX */ if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (tf->tf_type) { case T_DATA_MISS: case T_DATA_PROTECTION: addr = tf->tf_sfar; /* FALLTHROUGH */ case T_INSTRUCTION_MISS: sig = trap_pfault(td, tf); break; case T_FILL: sig = rwindow_load(td, tf, 2); break; case T_FILL_RET: sig = rwindow_load(td, tf, 1); break; case T_SPILL: sig = rwindow_save(td); break; case T_CORRECTED_ECC_ERROR: sig = trap_cecc(); break; case T_BREAKPOINT: sig = SIGTRAP; ucode = TRAP_BRKPT; break; default: if (tf->tf_type > T_MAX) panic("trap: bad trap type %#lx (user)", tf->tf_type); else if (trap_sig[tf->tf_type] == -1) panic("trap: %s (user)", trap_msg[tf->tf_type]); sig = trap_sig[tf->tf_type]; break; } if (sig != 0) { /* Translate fault for emulators. */ if (p->p_sysent->sv_transtrap != NULL) { sig = p->p_sysent->sv_transtrap(sig, tf->tf_type); } if (debugger_on_signal && (sig == 4 || sig == 10 || sig == 11)) kdb_enter(KDB_WHY_TRAPSIG, "trapsig"); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = (int)tf->tf_type; trapsignal(td, &ksi); } userret(td, tf); } else { KASSERT((tf->tf_type & T_KERNEL) != 0, ("trap: kernel trap isn't")); if (kdb_active) { kdb_reenter(); return; } switch (tf->tf_type & ~T_KERNEL) { case T_BREAKPOINT: case T_KSTACK_FAULT: error = (kdb_trap(tf->tf_type, 0, tf) == 0); TF_DONE(tf); break; #ifdef notyet case T_PA_WATCHPOINT: case T_VA_WATCHPOINT: error = db_watch_trap(tf); break; #endif case T_DATA_MISS: case T_DATA_PROTECTION: case T_INSTRUCTION_MISS: error = trap_pfault(td, tf); break; case T_DATA_EXCEPTION: case T_MEM_ADDRESS_NOT_ALIGNED: if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 && MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) { if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } } error = 1; break; case T_DATA_ERROR: /* * Handle PCI poke/peek as per UltraSPARC IIi * User's Manual 16.2.1, modulo checking the * TPC as USIII CPUs generate a precise trap * instead of a special deferred one. */ if (tf->tf_tpc > (u_long)fas_nofault_begin && tf->tf_tpc < (u_long)fas_nofault_end) { cache_flush(); cache_enable(PCPU_GET(impl)); tf->tf_tpc = (u_long)fas_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } error = 1; break; case T_CORRECTED_ECC_ERROR: error = trap_cecc(); break; default: error = 1; break; } if (error != 0) { tf->tf_type &= ~T_KERNEL; if (tf->tf_type > T_MAX) panic("trap: bad trap type %#lx (kernel)", tf->tf_type); panic("trap: %s (kernel)", trap_msg[tf->tf_type]); } } CTR1(KTR_TRAP, "trap: td=%p return", td); } static int trap_cecc(void) { u_long eee; /* * Turn off (non-)correctable error reporting while we're dealing * with the error. */ eee = ldxa(0, ASI_ESTATE_ERROR_EN_REG); stxa_sync(0, ASI_ESTATE_ERROR_EN_REG, eee & ~(AA_ESTATE_NCEEN | AA_ESTATE_CEEN)); /* Flush the caches in order ensure no corrupt data got installed. */ cache_flush(); /* Ensure the caches are still turned on (should be). */ cache_enable(PCPU_GET(impl)); /* Clear the error from the AFSR. */ stxa_sync(0, ASI_AFSR, ldxa(0, ASI_AFSR)); corrected_ecc++; printf("corrected ECC error\n"); /* Turn (non-)correctable error reporting back on. */ stxa_sync(0, ASI_ESTATE_ERROR_EN_REG, eee); return (0); } static int trap_pfault(struct thread *td, struct trapframe *tf) { vm_map_t map; struct proc *p; vm_offset_t va; vm_prot_t prot; vm_map_entry_t entry; u_long ctx; int type; int rv; if (td == NULL) return (-1); KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL")); KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL")); KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); p = td->td_proc; rv = KERN_SUCCESS; ctx = TLB_TAR_CTX(tf->tf_tar); type = tf->tf_type & ~T_KERNEL; va = TLB_TAR_VA(tf->tf_tar); CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx", td, p->p_vmspace->vm_pmap.pm_context[curcpu], va, ctx); if (type == T_DATA_PROTECTION) prot = VM_PROT_WRITE; else { if (type == T_DATA_MISS) prot = VM_PROT_READ; else prot = VM_PROT_READ | VM_PROT_EXECUTE; } if (ctx != TLB_CTX_KERNEL) { /* This is a fault on non-kernel virtual memory. */ map = &p->p_vmspace->vm_map; } else { /* * This is a fault on kernel virtual memory. Attempts to * access kernel memory from user mode cause privileged * action traps, not page fault. */ KASSERT(tf->tf_tstate & TSTATE_PRIV, ("trap_pfault: fault on nucleus context from user mode")); if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { vm_map_lock_read(kernel_map); if (vm_map_lookup_entry(kernel_map, va, &entry) && (entry->eflags & MAP_ENTRY_NOFAULT) != 0) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; vm_map_unlock_read(kernel_map); return (0); } vm_map_unlock_read(kernel_map); } map = kernel_map; } /* Fault in the page. */ rv = vm_fault(map, va, prot, VM_FAULT_NORMAL); CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d", td, va, rv); if (rv == KERN_SUCCESS) return (0); if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) { if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } /* Maximum number of arguments that can be passed via the out registers. */ #define REG_MAXARGS 6 int cpu_fetch_syscall_args(struct thread *td) { struct trapframe *tf; struct proc *p; register_t *argp; struct syscall_args *sa; int reg; int regcnt; int error; p = td->td_proc; tf = td->td_frame; sa = &td->td_sa; reg = 0; regcnt = REG_MAXARGS; sa->code = tf->tf_global[1]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = tf->tf_out[reg++]; regcnt--; } - if (p->p_sysent->sv_mask) - sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]), ("Too many syscall arguments!")); error = 0; argp = sa->args; bcopy(&tf->tf_out[reg], sa->args, sizeof(sa->args[0]) * regcnt); if (sa->narg > regcnt) error = copyin((void *)(tf->tf_out[6] + SPOFF + offsetof(struct frame, fr_pad[6])), &sa->args[regcnt], (sa->narg - regcnt) * sizeof(sa->args[0])); if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = 0; } return (error); } #include "../../kern/subr_syscall.c" /* * Syscall handler * The arguments to the syscall are passed in the out registers by the caller, * and are saved in the trap frame. The syscall number is passed in %g1 (and * also saved in the trap frame). */ void syscall(struct trapframe *tf) { struct thread *td; int error; td = curthread; td->td_frame = tf; KASSERT(td != NULL, ("trap: curthread NULL")); KASSERT(td->td_proc != NULL, ("trap: curproc NULL")); /* * For syscalls, we don't want to retry the faulting instruction * (usually), instead we need to advance one instruction. */ td->td_pcb->pcb_tpc = tf->tf_tpc; TF_DONE(tf); error = syscallenter(td); syscallret(td, error); } Index: head/sys/sys/sysent.h =================================================================== --- head/sys/sys/sysent.h (revision 342242) +++ head/sys/sys/sysent.h (revision 342243) @@ -1,325 +1,324 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSENT_H_ #define _SYS_SYSENT_H_ #include struct rlimit; struct sysent; struct thread; struct ksiginfo; struct syscall_args; enum systrace_probe_t { SYSTRACE_ENTRY, SYSTRACE_RETURN, }; typedef int sy_call_t(struct thread *, void *); typedef void (*systrace_probe_func_t)(struct syscall_args *, enum systrace_probe_t, int); typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *); #ifdef _KERNEL extern bool systrace_enabled; #endif extern systrace_probe_func_t systrace_probe_func; struct sysent { /* system call table */ int sy_narg; /* number of arguments */ sy_call_t *sy_call; /* implementing function */ au_event_t sy_auevent; /* audit event associated with syscall */ systrace_args_func_t sy_systrace_args_func; /* optional argument conversion function. */ u_int32_t sy_entry; /* DTrace entry ID for systrace. */ u_int32_t sy_return; /* DTrace return ID for systrace. */ u_int32_t sy_flags; /* General flags for system calls. */ u_int32_t sy_thrcnt; }; /* * A system call is permitted in capability mode. */ #define SYF_CAPENABLED 0x00000001 #define SY_THR_FLAGMASK 0x7 #define SY_THR_STATIC 0x1 #define SY_THR_DRAINING 0x2 #define SY_THR_ABSENT 0x4 #define SY_THR_INCR 0x8 #ifdef KLD_MODULE #define SY_THR_STATIC_KLD 0 #else #define SY_THR_STATIC_KLD SY_THR_STATIC #endif struct image_params; struct __sigset; struct trapframe; struct vnode; struct sysentvec { int sv_size; /* number of entries */ struct sysent *sv_table; /* pointer to sysent */ - u_int sv_mask; /* optional mask to index */ int sv_errsize; /* size of errno translation table */ const int *sv_errtbl; /* errno translation table */ int (*sv_transtrap)(int, int); /* translate trap-to-signal mapping */ int (*sv_fixup)(register_t **, struct image_params *); /* stack fixup function */ void (*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *); /* send signal */ char *sv_sigcode; /* start of sigtramp code */ int *sv_szsigcode; /* size of sigtramp code */ char *sv_name; /* name of binary type */ int (*sv_coredump)(struct thread *, struct vnode *, off_t, int); /* function to dump core, or NULL */ int (*sv_imgact_try)(struct image_params *); int sv_minsigstksz; /* minimum signal stack size */ int sv_pagesize; /* pagesize */ vm_offset_t sv_minuser; /* VM_MIN_ADDRESS */ vm_offset_t sv_maxuser; /* VM_MAXUSER_ADDRESS */ vm_offset_t sv_usrstack; /* USRSTACK */ vm_offset_t sv_psstrings; /* PS_STRINGS */ int sv_stackprot; /* vm protection for stack */ register_t *(*sv_copyout_strings)(struct image_params *); void (*sv_setregs)(struct thread *, struct image_params *, u_long); void (*sv_fixlimit)(struct rlimit *, int); u_long *sv_maxssiz; u_int sv_flags; void (*sv_set_syscall_retval)(struct thread *, int); int (*sv_fetch_syscall_args)(struct thread *); const char **sv_syscallnames; vm_offset_t sv_timekeep_base; vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_base; void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); int (*sv_trap)(struct thread *); u_long *sv_hwcap; /* Value passed in AT_HWCAP. */ u_long *sv_hwcap2; /* Value passed in AT_HWCAP2. */ }; #define SV_ILP32 0x000100 /* 32-bit executable. */ #define SV_LP64 0x000200 /* 64-bit executable. */ #define SV_IA32 0x004000 /* Intel 32-bit executable. */ #define SV_AOUT 0x008000 /* a.out executable. */ #define SV_SHP 0x010000 /* Shared page. */ #define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */ #define SV_TIMEKEEP 0x040000 /* Shared page timehands. */ #define SV_ABI_MASK 0xff #define SV_ABI_ERRNO(p, e) ((p)->p_sysent->sv_errsize <= 0 ? e : \ ((e) >= (p)->p_sysent->sv_errsize ? -1 : (p)->p_sysent->sv_errtbl[e])) #define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x)) #define SV_PROC_ABI(p) ((p)->p_sysent->sv_flags & SV_ABI_MASK) #define SV_CURPROC_FLAG(x) SV_PROC_FLAG(curproc, x) #define SV_CURPROC_ABI() SV_PROC_ABI(curproc) /* same as ELFOSABI_XXX, to prevent header pollution */ #define SV_ABI_LINUX 3 #define SV_ABI_FREEBSD 9 #define SV_ABI_CLOUDABI 17 #define SV_ABI_UNDEF 255 #ifdef _KERNEL extern struct sysentvec aout_sysvec; extern struct sysent sysent[]; extern const char *syscallnames[]; #if defined(__amd64__) extern int i386_read_exec; #endif #define NO_SYSCALL (-1) struct module; struct syscall_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ int *offset; /* offset into sysent */ struct sysent *new_sysent; /* new sysent */ struct sysent old_sysent; /* old sysent */ int flags; /* flags for syscall_register */ }; /* separate initialization vector so it can be used in a substructure */ #define SYSENT_INIT_VALS(_syscallname) { \ .sy_narg = (sizeof(struct _syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)&sys_##_syscallname, \ .sy_auevent = SYS_AUE_##_syscallname, \ .sy_systrace_args_func = NULL, \ .sy_entry = 0, \ .sy_return = 0, \ .sy_flags = 0, \ .sy_thrcnt = 0 \ } #define MAKE_SYSENT(syscallname) \ static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname); #define MAKE_SYSENT_COMPAT(syscallname) \ static struct sysent syscallname##_sysent = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& syscallname, \ SYS_AUE_##syscallname \ } #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg) \ static struct syscall_module_data name##_syscall_mod = { \ evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL } \ }; \ \ static moduledata_t name##_mod = { \ "sys/" #name, \ syscall_module_handler, \ &name##_syscall_mod \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE) #define SYSCALL_MODULE_HELPER(syscallname) \ static int syscallname##_syscall = SYS_##syscallname; \ MAKE_SYSENT(syscallname); \ SYSCALL_MODULE(syscallname, \ & syscallname##_syscall, & syscallname##_sysent, \ NULL, NULL) #define SYSCALL_MODULE_PRESENT(syscallname) \ (sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys && \ sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys) /* * Syscall registration helpers with resource allocation handling. */ struct syscall_helper_data { struct sysent new_sysent; struct sysent old_sysent; int syscall_no; int registered; }; #define SYSCALL_INIT_HELPER_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& sys_ ## syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER(syscallname) \ SYSCALL_INIT_HELPER_F(syscallname, 0) #define SYSCALL_INIT_HELPER_COMPAT(syscallname) \ SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0) #define SYSCALL_INIT_LAST { \ .syscall_no = NO_SYSCALL \ } int syscall_module_handler(struct module *mod, int what, void *arg); int syscall_helper_register(struct syscall_helper_data *sd, int flags); int syscall_helper_unregister(struct syscall_helper_data *sd); /* Implementation, exposed for COMPAT code */ int kern_syscall_register(struct sysent *sysents, int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags); int kern_syscall_deregister(struct sysent *sysents, int offset, const struct sysent *old_sysent); int kern_syscall_module_handler(struct sysent *sysents, struct module *mod, int what, void *arg); int kern_syscall_helper_register(struct sysent *sysents, struct syscall_helper_data *sd, int flags); int kern_syscall_helper_unregister(struct sysent *sysents, struct syscall_helper_data *sd); struct proc; const char *syscallname(struct proc *p, u_int code); /* Special purpose system call functions. */ struct nosys_args; int lkmnosys(struct thread *, struct nosys_args *); int lkmressys(struct thread *, struct nosys_args *); int _syscall_thread_enter(struct thread *td, struct sysent *se); void _syscall_thread_exit(struct thread *td, struct sysent *se); static inline int syscall_thread_enter(struct thread *td, struct sysent *se) { if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0)) return (0); return (_syscall_thread_enter(td, se)); } static inline void syscall_thread_exit(struct thread *td, struct sysent *se) { if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0)) return; _syscall_thread_exit(td, se); } int shared_page_alloc(int size, int align); int shared_page_fill(int size, int align, const void *data); void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); void exec_inittk(void); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ (sysinit_cfunc_t)exec_sysvec_init, sv); #endif /* _KERNEL */ #endif /* !_SYS_SYSENT_H_ */