Index: head/sys/amd64/amd64/elf_machdep.c =================================================================== --- head/sys/amd64/amd64/elf_machdep.c (revision 365831) +++ head/sys/amd64/amd64/elf_machdep.c (revision 365832) @@ -1,429 +1,425 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf64_freebsd_sysvec_la48 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA48, .sv_usrstack = USRSTACK_LA48, .sv_psstrings = PS_STRINGS_LA48, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE_LA48, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_stackgap = elf64_stackgap, }; struct sysentvec elf64_freebsd_sysvec_la57 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA57, .sv_usrstack = USRSTACK_LA57, .sv_psstrings = PS_STRINGS_LA57, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE_LA57, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_stackgap = elf64_stackgap, }; static void amd64_init_sysvecs(void *arg) { amd64_lower_shared_page(&elf64_freebsd_sysvec_la48); if (la57) { exec_sysvec_init(&elf64_freebsd_sysvec_la57); exec_sysvec_init_secondary(&elf64_freebsd_sysvec_la57, &elf64_freebsd_sysvec_la48); } else { exec_sysvec_init(&elf64_freebsd_sysvec_la48); } } SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, amd64_init_sysvecs, NULL); void amd64_lower_shared_page(struct sysentvec *sv) { if (hw_lower_amd64_sharedpage != 0) { sv->sv_maxuser -= PAGE_SIZE; sv->sv_shared_page_base -= PAGE_SIZE; sv->sv_usrstack -= PAGE_SIZE; sv->sv_psstrings -= PAGE_SIZE; } } static boolean_t freebsd_brand_info_la57_img_compat(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0) { if ((imgp->proc->p_md.md_flags & P_MD_LA57) != 0) return (TRUE); if (fctl0 == NULL || (*fctl0 & NT_FREEBSD_FCTL_LA48) != 0) return (FALSE); if ((imgp->proc->p_md.md_flags & P_MD_LA48) != 0) return (FALSE); return (TRUE); } static Elf64_Brandinfo freebsd_brand_info_la48 = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, }; static Elf64_Brandinfo freebsd_brand_info_la57 = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la57, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = freebsd_brand_info_la57_img_compat, }; static void sysinit_register_elf64_brand_entries(void *arg __unused) { /* * _57 must go first so it can either claim the image or hand * it to _48. */ if (la57) elf64_insert_brand_entry(&freebsd_brand_info_la57); elf64_insert_brand_entry(&freebsd_brand_info_la48); } SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, sysinit_register_elf64_brand_entries, NULL); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo); static Elf64_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld-kfreebsd-x86-64.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info); void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf64_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_X86_64_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, bool late_ifunc, elf_lookup_fn lookup) { Elf64_Addr *where, val; Elf32_Addr *where32, val32; Elf_Addr addr; Elf_Addr addend; Elf_Size rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); /* Addend is 32 bit on 32 bit relocs */ switch (rtype) { case R_X86_64_PC32: case R_X86_64_32S: case R_X86_64_PLT32: addend = *(Elf32_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (late_ifunc) { KASSERT(type == ELF_RELOC_RELA, ("Only RELA ifunc relocations are supported")); if (rtype != R_X86_64_IRELATIVE) return (0); } switch (rtype) { case R_X86_64_NONE: /* none */ break; case R_X86_64_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); val = addr + addend; if (error != 0) return -1; if (*where != val) *where = val; break; case R_X86_64_PC32: /* S + A - P */ case R_X86_64_PLT32: /* L + A - P, L is PLT location for the symbol, which we treat as S */ error = lookup(lf, symidx, 1, &addr); where32 = (Elf32_Addr *)where; val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where); if (error != 0) return -1; if (*where32 != val32) *where32 = val32; break; case R_X86_64_32S: /* S + A sign extend */ error = lookup(lf, symidx, 1, &addr); val32 = (Elf32_Addr)(addr + addend); where32 = (Elf32_Addr *)where; if (error != 0) return -1; if (*where32 != val32) *where32 = val32; break; case R_X86_64_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return (-1); case R_X86_64_GLOB_DAT: /* S */ case R_X86_64_JMP_SLOT: /* XXX need addend + offset */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_X86_64_RELATIVE: /* B + A */ addr = elf_relocaddr(lf, relocbase + addend); val = addr; if (*where != val) *where = val; break; case R_X86_64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %ld\n", rtype); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, false, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, false, lookup)); } int elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, true, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } Index: head/sys/amd64/linux/linux_sysvec.c =================================================================== --- head/sys/amd64/linux/linux_sysvec.c (revision 365831) +++ head/sys/amd64/linux/linux_sysvec.c (revision 365832) @@ -1,940 +1,938 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64, 1); const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *iparams); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static int linux_vsyscall(struct thread *td); #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc; frame = td->td_frame; sa = &td->td_sa; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rcx; sa->args[4] = frame->tf_r8; sa->args[5] = frame->tf_r9; sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; /* * On Linux only %rcx and %r11 values are not preserved across * the syscall. So, do not clobber %rdx and %r10. */ td->td_retval[1] = frame->tf_rdx; if (error != EJUSTRETURN) frame->tf_r10 = frame->tf_rcx; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) - frame->tf_rax = SV_ABI_ERRNO(td->td_proc, error); + frame->tf_rax = linux_to_bsd_errno(error); } /* Restore all registers. */ set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } static int linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) { Elf_Auxargs *args; Elf_Auxinfo *argarray, *pos; struct proc *p; int error, issetugid; p = imgp->proc; args = (Elf64_Auxargs *)imgp->auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) { Elf_Addr *base; base = (Elf64_Addr *)*stack_base; base--; if (suword(base, (uint64_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, error; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (uintptr_t)arginfo; if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(void *)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * Starting with 2.24, glibc depends on a 16-byte stack alignment. * One "long argc" will be prepended later. */ vectp = (char **)((((uintptr_t)vectp + 8) & ~0xF) - 8); /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword(vectp++, 0) != 0) return (EFAULT); if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs; struct pcb *pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; set_pcb_flags(pcb, PCB_FULL_IRET); saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } /* * Copied from amd64/amd64/machdep.c * * XXX fpu state need? don't think so */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct proc *p; struct l_ucontext uc; struct l_sigcontext *context; struct trapframe *regs; unsigned long rflags; int error; ksiginfo_t ksi; regs = td->td_frame; error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc)); if (error != 0) return (error); p = td->td_proc; context = &uc.uc_mcontext; rflags = context->sc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ #define RFLAG_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } PROC_LOCK(p); linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); regs->tf_rdi = context->sc_rdi; regs->tf_rsi = context->sc_rsi; regs->tf_rdx = context->sc_rdx; regs->tf_rbp = context->sc_rbp; regs->tf_rbx = context->sc_rbx; regs->tf_rcx = context->sc_rcx; regs->tf_rax = context->sc_rax; regs->tf_rip = context->sc_rip; regs->tf_rsp = context->sc_rsp; regs->tf_r8 = context->sc_r8; regs->tf_r9 = context->sc_r9; regs->tf_r10 = context->sc_r10; regs->tf_r11 = context->sc_r11; regs->tf_r12 = context->sc_r12; regs->tf_r13 = context->sc_r13; regs->tf_r14 = context->sc_r14; regs->tf_r15 = context->sc_r15; regs->tf_cs = context->sc_cs; regs->tf_err = context->sc_err; regs->tf_rflags = rflags; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * copied from amd64/amd64/machdep.c * * Send an interrupt to process. */ static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct l_rt_sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; caddr_t sp; struct trapframe *regs; int sig, code; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u", catcher, sig, mask, code); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe); } else sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul); mtx_unlock(&psp->ps_mtx); /* Translate the signal. */ sig = bsd_to_linux_signal(sig); /* Save user context. */ bzero(&sf, sizeof(sf)); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask); sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); sf.sf_sc.uc_mcontext.sc_rdi = regs->tf_rdi; sf.sf_sc.uc_mcontext.sc_rsi = regs->tf_rsi; sf.sf_sc.uc_mcontext.sc_rdx = regs->tf_rdx; sf.sf_sc.uc_mcontext.sc_rbp = regs->tf_rbp; sf.sf_sc.uc_mcontext.sc_rbx = regs->tf_rbx; sf.sf_sc.uc_mcontext.sc_rcx = regs->tf_rcx; sf.sf_sc.uc_mcontext.sc_rax = regs->tf_rax; sf.sf_sc.uc_mcontext.sc_rip = regs->tf_rip; sf.sf_sc.uc_mcontext.sc_rsp = regs->tf_rsp; sf.sf_sc.uc_mcontext.sc_r8 = regs->tf_r8; sf.sf_sc.uc_mcontext.sc_r9 = regs->tf_r9; sf.sf_sc.uc_mcontext.sc_r10 = regs->tf_r10; sf.sf_sc.uc_mcontext.sc_r11 = regs->tf_r11; sf.sf_sc.uc_mcontext.sc_r12 = regs->tf_r12; sf.sf_sc.uc_mcontext.sc_r13 = regs->tf_r13; sf.sf_sc.uc_mcontext.sc_r14 = regs->tf_r14; sf.sf_sc.uc_mcontext.sc_r15 = regs->tf_r15; sf.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags; sf.sf_sc.uc_mcontext.sc_err = regs->tf_err; sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); sf.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rax = 0; regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ regs->tf_rdx = (register_t)&sfp->sf_sc; /* arg 3 in %rdx */ sf.sf_handler = catcher; /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig); /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = linux_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #define LINUX_VSYSCALL_START (-10UL << 20) #define LINUX_VSYSCALL_SZ 1024 const unsigned long linux_vsyscall_vector[] = { LINUX_SYS_gettimeofday, LINUX_SYS_linux_time, LINUX_SYS_linux_getcpu, }; static int linux_vsyscall(struct thread *td) { struct trapframe *frame; uint64_t retqaddr; int code, traced; int error; frame = td->td_frame; /* Check %rip for vsyscall area. */ if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START)) return (EINVAL); if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0) return (EINVAL); code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ; if (code >= nitems(linux_vsyscall_vector)) return (EINVAL); /* * vsyscall called as callq *(%rax), so we must * use return address from %rsp and also fixup %rsp. */ error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr)); if (error) return (error); frame->tf_rip = retqaddr; frame->tf_rax = linux_vsyscall_vector[code]; frame->tf_rsp += 8; traced = (frame->tf_flags & PSL_T); amd64_syscall(td, traced); return (0); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_errsize = ELAST + 1, - .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA48, .sv_usrstack = USRSTACK_LA48, .sv_psstrings = PS_STRINGS_LA48, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE_LA48, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(void *param) { amd64_lower_shared_page(&elf_linux_sysvec); linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNULINUX_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNULINUX_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_glibc2brandshort = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib64/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-musl-x86_64.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, &linux_glibc2brandshort, &linux_muslbrand, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux x86-64 ELF exec handler installed\n"); } else printf("cannot insert Linux x86-64 ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); FEATURE(linux64, "Linux 64bit support"); Index: head/sys/amd64/linux32/linux32_sysvec.c =================================================================== --- head/sys/amd64/linux32/linux32_sysvec.c (revision 365831) +++ head/sys/amd64/linux32/linux32_sysvec.c (revision 365832) @@ -1,1104 +1,1102 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_compat.h" #include __FBSDID("$FreeBSD$"); #ifndef COMPAT_FREEBSD32 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" #endif #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux32_locore_o_start; extern char _binary_linux32_locore_o_end; extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *iparams); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static void linux32_fixlimit(struct rlimit *rl, int which); static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux32_set_syscall_retval(struct thread *td, int error); #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux32_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) { Elf32_Addr *base; base = (Elf32_Addr *)*stack_base; base--; if (suword32(base, (uint32_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int oonstack; int sig; int code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; frame.sf_siginfo = PTROUT(&fp->sf_si); frame.sf_ucontext = PTROUT(&fp->sf_sc); /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn and libgcc unwind. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int oonstack; int sig, code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; frame.sf_sc.sc_esp = regs->tf_rsp; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; frame.sf_sc.sc_eip = regs->tf_rip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_rflags; frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; sigset_t bmask; l_sigset_t lmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; lmask.__mask = frame.sf_extramask[0]; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ regs->tf_rdi = frame.sf_sc.sc_edi; regs->tf_rsi = frame.sf_sc.sc_esi; regs->tf_rbp = frame.sf_sc.sc_ebp; regs->tf_rbx = frame.sf_sc.sc_ebx; regs->tf_rdx = frame.sf_sc.sc_edx; regs->tf_rcx = frame.sf_sc.sc_ecx; regs->tf_rax = frame.sf_sc.sc_eax; regs->tf_rip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_es = frame.sf_sc.sc_es; regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_gs = frame.sf_sc.sc_gs; regs->tf_rflags = eflags; regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ regs->tf_gs = context->sc_gs; regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_rdi = context->sc_edi; regs->tf_rsi = context->sc_esi; regs->tf_rbp = context->sc_ebp; regs->tf_rbx = context->sc_ebx; regs->tf_rdx = context->sc_edx; regs->tf_rcx = context->sc_ecx; regs->tf_rax = context->sc_eax; regs->tf_rip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_rflags = eflags; regs->tf_rsp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = PTRIN(lss->ss_sp); ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rsi; sa->args[4] = frame->tf_rdi; sa->args[5] = frame->tf_rbp; /* Unconfirmed */ sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } static void linux32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) - frame->tf_rax = SV_ABI_ERRNO(td->td_proc, error); + frame->tf_rax = linux_to_bsd_errno(error); } } /* * Clear registers on exec * XXX copied from ia32_signal.c. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_gs = _ugssel; regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = (register_t)imgp->ps_strings; fpstate_drop(td); /* Do full restore on return so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); } /* * XXX copied from ia32_sysvec.c. */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, error; u_int32_t *vectp; char *stringp; uintptr_t destp, ustringp; struct linux32_ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; destp = (uintptr_t)arginfo; if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(uint32_t)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(uint32_t)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(uint32_t)); } vectp = (uint32_t *)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword32(vectp++, 0) != 0) return (EFAULT); if (suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword32(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "32-bit Linux emulation"); static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, &linux32_maxdsiz, 0, ""); static u_long linux32_maxssiz = LINUX32_MAXSSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, &linux32_maxssiz, 0, ""); static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); static void linux32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (linux32_maxdsiz != 0) { if (rl->rlim_cur > linux32_maxdsiz) rl->rlim_cur = linux32_maxdsiz; if (rl->rlim_max > linux32_maxdsiz) rl->rlim_max = linux32_maxdsiz; } break; case RLIMIT_STACK: if (linux32_maxssiz != 0) { if (rl->rlim_cur > linux32_maxssiz) rl->rlim_cur = linux32_maxssiz; if (rl->rlim_max > linux32_maxssiz) rl->rlim_max = linux32_maxssiz; } break; case RLIMIT_VMEM: if (linux32_maxvmem != 0) { if (rl->rlim_cur > linux32_maxvmem) rl->rlim_cur = linux32_maxvmem; if (rl->rlim_max > linux32_maxvmem) rl->rlim_max = linux32_maxvmem; } break; } } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX32_SYS_MAXSYSCALL, .sv_table = linux32_sysent, - .sv_errsize = ELAST + 1, - .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux32_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = LINUX32_MAXUSER, .sv_usrstack = LINUX32_USRSTACK, .sv_psstrings = LINUX32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = linux32_fixlimit, .sv_maxssiz = &linux32_maxssiz, .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, .sv_set_syscall_retval = linux32_set_syscall_retval, .sv_fetch_syscall_args = linux32_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux32_locore_o_end - &_binary_linux32_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux32_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux32_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-musl-i386.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux32_ioctl_register_handler(*lihp); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux32_ioctl_unregister_handler(*lihp); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); FEATURE(linux, "Linux 32bit support"); Index: head/sys/arm/arm/elf_machdep.c =================================================================== --- head/sys/arm/arm/elf_machdep.c (revision 365831) +++ head/sys/arm/arm/elf_machdep.c (revision 365832) @@ -1,345 +1,343 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #include "opt_ddb.h" /* for OPT_DDB */ #include "opt_global.h" /* for OPT_KDTRACE_HOOKS */ #include "opt_stack.h" /* for OPT_STACK */ static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *, uint32_t *); u_long elf_hwcap; u_long elf_hwcap2; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = #if __ARM_ARCH >= 6 SV_ASLR | SV_SHP | SV_TIMEKEEP | #endif SV_ABI_FREEBSD | SV_ILP32 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_hwcap2 = &elf_hwcap2, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static boolean_t elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; /* * When configured for EABI, FreeBSD supports EABI vesions 4 and 5. */ if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary (rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (FALSE); } return (TRUE); } void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { #ifdef VFP mcontext_vfp_t vfp; if (dst != NULL) { get_vfpcontext(td, &vfp); *off = elf32_populate_note(NT_ARM_VFP, &vfp, dst, sizeof(vfp), NULL); } else *off = elf32_populate_note(NT_ARM_VFP, NULL, NULL, sizeof(vfp), NULL); #endif } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* * It is possible for the compiler to emit relocations for unaligned data. * We handle this situation with these inlines. */ #define RELOC_ALIGNED_P(x) \ (((uintptr_t)(x) & (sizeof(void *) - 1)) == 0) static __inline Elf_Addr load_ptr(Elf_Addr *where) { Elf_Addr res; if (RELOC_ALIGNED_P(where)) return *where; memcpy(&res, where, sizeof(res)); return (res); } static __inline void store_ptr(Elf_Addr *where, Elf_Addr val) { if (RELOC_ALIGNED_P(where)) *where = val; else memcpy(where, &val, sizeof(val)); } #undef RELOC_ALIGNED_P /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = load_ptr(where); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_ARM_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (load_ptr(where) != addr) store_ptr(where, addr); } return (0); } switch (rtype) { case R_ARM_NONE: /* none */ break; case R_ARM_ABS32: error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; store_ptr(where, addr + load_ptr(where)); break; case R_ARM_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_ARM_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error == 0) { store_ptr(where, addr); return (0); } return (-1); case R_ARM_RELATIVE: break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* * The pmap code does not do an icache sync upon establishing executable * mappings in the kernel pmap. It's an optimization based on the fact * that kernel memory allocations always have EXECUTABLE protection even * when the memory isn't going to hold executable code. The only time * kernel memory holding instructions does need a sync is after loading * a kernel module, and that's when this function gets called. * * This syncs data and instruction caches after loading a module. We * don't worry about the kernel itself (lf->id is 1) as locore.S did * that on entry. Even if data cache maintenance was done by IO code, * the relocation fixup process creates dirty cache entries that we must * write back before doing icache sync. The instruction cache sync also * invalidates the branch predictor cache on platforms that have one. */ if (lf->id == 1) return (0); #if __ARM_ARCH >= 6 dcache_wb_pou((vm_offset_t)lf->address, (vm_size_t)lf->size); icache_inv_all(); #else cpu_dcache_wb_range((vm_offset_t)lf->address, (vm_size_t)lf->size); cpu_l2cache_wb_range((vm_offset_t)lf->address, (vm_size_t)lf->size); cpu_icache_sync_range((vm_offset_t)lf->address, (vm_size_t)lf->size); #endif #if defined(DDB) || defined(KDTRACE_HOOKS) || defined(STACK) /* * Inform the stack(9) code of the new module, so it can acquire its * per-module unwind data. */ unwind_module_loaded(lf); #endif return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf) { #if defined(DDB) || defined(KDTRACE_HOOKS) || defined(STACK) /* Inform the stack(9) code that this module is gone. */ unwind_module_unloaded(lf); #endif return (0); } Index: head/sys/arm64/arm64/elf32_machdep.c =================================================================== --- head/sys/arm64/arm64/elf32_machdep.c (revision 365831) +++ head/sys/arm64/arm64/elf32_machdep.c (revision 365832) @@ -1,261 +1,259 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014, 2017 Andrew Turner. * Copyright (c) 2018 Olivier Houchard * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define FREEBSD32_MINUSER 0x00001000 #define FREEBSD32_MAXUSER ((1ul << 32) - PAGE_SIZE) #define FREEBSD32_SHAREDPAGE (FREEBSD32_MAXUSER - PAGE_SIZE) #define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE extern const char *freebsd32_syscallnames[]; extern char aarch32_sigcode[]; extern int sz_aarch32_sigcode; static int freebsd32_fetch_syscall_args(struct thread *td); static void freebsd32_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void freebsd32_set_syscall_retval(struct thread *, int); static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *, uint32_t *); extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = freebsd32_sendsig, .sv_sigcode = aarch32_sigcode, .sv_szsigcode = &sz_aarch32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_setregs, .sv_fixlimit = NULL, // XXX .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = freebsd32_set_syscall_retval, .sv_fetch_syscall_args = freebsd32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info); static boolean_t elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf32_Ehdr *hdr; /* Check if we support AArch32 */ if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) != ID_AA64PFR0_EL0_64_32) return (FALSE); #define EF_ARM_EABI_VERSION(x) (((x) & EF_ARM_EABIMASK) >> 24) #define EF_ARM_EABI_FREEBSD_MIN 4 hdr = (const Elf32_Ehdr *)imgp->image_header; if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary " "(rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (FALSE); } return (TRUE); } static int freebsd32_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int error, i, nap; unsigned int args[4]; nap = 4; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; /* r7 is the syscall id */ sa->code = td->td_frame->tf_x[7]; if (sa->code == SYS_syscall) { sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[1]; nap -= 2; ap += 2; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; for (i = 0; i < nap; i++) sa->args[i] = ap[i]; if (sa->narg > nap) { if ((sa->narg - nap) > nitems(args)) panic("Too many system call arguiments"); error = copyin((void *)td->td_frame->tf_x[13], args, (sa->narg - nap) * sizeof(int)); for (i = 0; i < (sa->narg - nap); i++) sa->args[i + nap] = args[i]; } td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } static void freebsd32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ if ((frame->tf_spsr & PSR_T) != 0) frame->tf_elr -= 2; //THUMB_INSN_SIZE; else frame->tf_elr -= 4; //INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_x[0] = error; frame->tf_spsr |= PSR_C; break; } } static void freebsd32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = stack; /* SP_usr is mapped to x13 */ tf->tf_x[13] = stack; /* LR_usr is mapped to x14 */ tf->tf_x[14] = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; tf->tf_spsr = PSR_M_32; } void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { /* XXX: VFP */ } Index: head/sys/arm64/arm64/elf_machdep.c =================================================================== --- head/sys/arm64/arm64/elf_machdep.c (revision 365831) +++ head/sys/arm64/arm64/elf_machdep.c (revision 365832) @@ -1,286 +1,284 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014 Andrew Turner. * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linker_if.h" u_long __read_frequently elf_hwcap; u_long __read_frequently elf_hwcap2; static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_hwcap2 = &elf_hwcap2, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_AARCH64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (ELF_R_TYPE(r_info) == R_AARCH64_IRELATIVE); } static int reloc_instr_imm(Elf32_Addr *where, Elf_Addr val, u_int msb, u_int lsb) { /* Check bounds: upper bits must be all ones or all zeros. */ if ((uint64_t)((int64_t)val >> (msb + 1)) + 1 > 1) return (-1); val >>= lsb; val &= (1 << (msb - lsb + 1)) - 1; *where |= (Elf32_Addr)val; return (0); } /* * Process a relocation. Support for some static relocations is required * in order for the -zifunc-noplt optimization to work. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int flags, elf_lookup_fn lookup) { #define ARM64_ELF_RELOC_LOCAL (1 << 0) #define ARM64_ELF_RELOC_LATE_IFUNC (1 << 1) Elf_Addr *where, addr, addend, val; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if ((flags & ARM64_ELF_RELOC_LATE_IFUNC) != 0) { KASSERT(type == ELF_RELOC_RELA, ("Only RELA ifunc relocations are supported")); if (rtype != R_AARCH64_IRELATIVE) return (0); } if ((flags & ARM64_ELF_RELOC_LOCAL) != 0) { if (rtype == R_AARCH64_RELATIVE) *where = elf_relocaddr(lf, relocbase + addend); return (0); } error = 0; switch (rtype) { case R_AARCH64_NONE: case R_AARCH64_RELATIVE: break; case R_AARCH64_TSTBR14: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); error = reloc_instr_imm((Elf32_Addr *)where, addr + addend - (Elf_Addr)where, 15, 2); break; case R_AARCH64_CONDBR19: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); error = reloc_instr_imm((Elf32_Addr *)where, addr + addend - (Elf_Addr)where, 20, 2); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); error = reloc_instr_imm((Elf32_Addr *)where, addr + addend - (Elf_Addr)where, 27, 2); break; case R_AARCH64_ABS64: case R_AARCH64_GLOB_DAT: case R_AARCH64_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); *where = addr + addend; break; case R_AARCH64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return (-1); } return (error); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, ARM64_ELF_RELOC_LOCAL, lookup)); } /* Process one elf relocation with addend. */ int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, ARM64_ELF_RELOC_LATE_IFUNC, lookup)); } int elf_cpu_load_file(linker_file_t lf) { if (lf->id != 1) cpu_icache_sync_range((vm_offset_t)lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } Index: head/sys/arm64/linux/linux_sysvec.c =================================================================== --- head/sys/arm64/linux/linux_sysvec.c (revision 365831) +++ head/sys/arm64/linux/linux_sysvec.c (revision 365832) @@ -1,573 +1,571 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * Copyright (c) 2018 Turing Robotic Industries Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64elf, 1); const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static int linux_elf_fixup(uintptr_t *stack_base, struct image_params *iparams); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(const void *param); static void linux_vdso_deinstall(const void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static int linux_vsyscall(struct thread *td); /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /* DTrace probes */ LIN_SDT_PROBE_DEFINE2(sysvec, linux_translate_traps, todo, "int", "int"); LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_copyout_auxargs, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sigreturn, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sendsig, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vsyscall, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_install, todo); LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_deinstall, todo); /* LINUXTODO: do we have traps to translate? */ static int linux_translate_traps(int signal, int trap_code) { LIN_SDT_PROBE2(sysvec, linux_translate_traps, todo, signal, trap_code); return (signal); } LINUX_VDSO_SYM_CHAR(linux_platform); static int linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct syscall_args *sa; register_t *ap; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; /* LINUXTODO: generic syscall? */ if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (sa->narg > 8) panic("ARM64TODO: Could we have more than 8 args?"); memcpy(sa->args, ap, 8 * sizeof(register_t)); td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { td->td_retval[1] = td->td_frame->tf_x[1]; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) { td->td_frame->tf_x[0] = - SV_ABI_ERRNO(td->td_proc, error); + linux_to_bsd_errno(error); } } } static int linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) { Elf_Auxargs *args; Elf_Auxinfo *argarray, *pos; struct proc *p; int error, issetugid; LIN_SDT_PROBE0(sysvec, linux_copyout_auxargs, todo); p = imgp->proc; args = (Elf64_Auxargs *)imgp->auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); #if 0 /* LINUXTODO: implement arm64 LINUX_AT_HWCAP */ AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); #endif AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); #if 0 /* LINUXTODO: implement arm64 LINUX_AT_PLATFORM */ AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); #endif AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_elf_fixup(uintptr_t *stack_base, struct image_params *imgp) { LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo); return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. * LINUXTODO: deduplicate against other linuxulator archs */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; int argc, envc, error; /* Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (uintptr_t)arginfo; if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(void *)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for argc and the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1; vectp = (char **)STACKALIGN(vectp); /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); if (suword(vectp++, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword(vectp++, 0) != 0) return (EFAULT); if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *regs = td->td_frame; /* LINUXTODO: validate */ LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo); memset(regs, 0, sizeof(*regs)); /* glibc start.S registers function pointer in x0 with atexit. */ regs->tf_sp = stack; #if 0 /* LINUXTODO: See if this is used. */ regs->tf_lr = imgp->entry_addr; #else regs->tf_lr = 0xffffffffffffffff; #endif regs->tf_elr = imgp->entry_addr; } int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_rt_sigreturn, todo); return (EDOOFUS); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_rt_sendsig, todo); } static int linux_vsyscall(struct thread *td) { /* LINUXTODO: implement */ LIN_SDT_PROBE0(sysvec, linux_vsyscall, todo); return (EDOOFUS); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_errsize = ELAST + 1, - .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_elf_fixup, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, /* XXX */ .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(const void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("invalid Linux VDSO size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); memcpy(linux_shared_page_mapping, elf_linux_sysvec.sv_sigcode, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; printf("LINUXTODO: %s: fix linux_kplatform\n", __func__); #if 0 linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base); #else linux_kplatform = "arm64"; #endif } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(const void *param) { LIN_SDT_PROBE0(sysvec, linux_vdso_deinstall, todo); __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNU_ABI_LINUX = 0; /* LINUXTODO: deduplicate */ static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNU_ABI_LINUX) return (false); *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_AARCH64, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; struct linux_ioctl_handler**lihp; int error; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux arm64 ELF exec handler installed\n"); } break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); FEATURE(linux64, "AArch64 Linux 64bit support"); Index: head/sys/compat/ia32/ia32_sysvec.c =================================================================== --- head/sys/compat/ia32/ia32_sysvec.c (revision 365831) +++ head/sys/compat/ia32/ia32_sysvec.c (revision 365832) @@ -1,235 +1,233 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Doug Rabson * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct ia32_mcontext) == 640); CTASSERT(sizeof(struct ia32_ucontext) == 704); CTASSERT(sizeof(struct ia32_sigframe) == 800); CTASSERT(sizeof(struct siginfo32) == 64); #ifdef COMPAT_FREEBSD4 CTASSERT(sizeof(struct ia32_mcontext4) == 260); CTASSERT(sizeof(struct ia32_ucontext4) == 324); CTASSERT(sizeof(struct ia32_sigframe4) == 408); #endif extern const char *freebsd32_syscallnames[]; static SYSCTL_NODE(_compat, OID_AUTO, ia32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "ia32 mode"); static u_long ia32_maxdsiz = IA32_MAXDSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ia32_maxdsiz, 0, ""); u_long ia32_maxssiz = IA32_MAXSSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ia32_maxssiz, 0, ""); static u_long ia32_maxvmem = IA32_MAXVMEM; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RWTUN, &ia32_maxvmem, 0, ""); struct sysentvec ia32_freebsd_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = ia32_sigcode, .sv_szsigcode = &sz_ia32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_stackgap = elf32_stackgap, }; INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec); static Elf32_Brandinfo ia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_info); static Elf32_Brandinfo ia32_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_oinfo); static Elf32_Brandinfo kia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &ia32_freebsd_sysvec, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kia32_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } void ia32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ia32_maxdsiz != 0) { if (rl->rlim_cur > ia32_maxdsiz) rl->rlim_cur = ia32_maxdsiz; if (rl->rlim_max > ia32_maxdsiz) rl->rlim_max = ia32_maxdsiz; } break; case RLIMIT_STACK: if (ia32_maxssiz != 0) { if (rl->rlim_cur > ia32_maxssiz) rl->rlim_cur = ia32_maxssiz; if (rl->rlim_max > ia32_maxssiz) rl->rlim_max = ia32_maxssiz; } break; case RLIMIT_VMEM: if (ia32_maxvmem != 0) { if (rl->rlim_cur > ia32_maxvmem) rl->rlim_cur = ia32_maxvmem; if (rl->rlim_max > ia32_maxvmem) rl->rlim_max = ia32_maxvmem; } break; } } Index: head/sys/compat/linux/linux.h =================================================================== --- head/sys/compat/linux/linux.h (revision 365831) +++ head/sys/compat/linux/linux.h (revision 365832) @@ -1,195 +1,197 @@ /*- * Copyright (c) 2015 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_MI_H_ #define _LINUX_MI_H_ #include #define LINUX_IFHWADDRLEN 6 #define LINUX_IFNAMSIZ 16 /* * Criteria for interface name translation */ #define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER) #define IFP_IS_LOOP(ifp) (ifp->if_type == IFT_LOOP) struct l_sockaddr { unsigned short sa_family; char sa_data[14]; }; #define LINUX_ARPHRD_ETHER 1 #define LINUX_ARPHRD_LOOPBACK 772 /* * Supported address families */ #define LINUX_AF_UNSPEC 0 #define LINUX_AF_UNIX 1 #define LINUX_AF_INET 2 #define LINUX_AF_AX25 3 #define LINUX_AF_IPX 4 #define LINUX_AF_APPLETALK 5 #define LINUX_AF_INET6 10 /* * net device flags */ #define LINUX_IFF_UP 0x0001 #define LINUX_IFF_BROADCAST 0x0002 #define LINUX_IFF_DEBUG 0x0004 #define LINUX_IFF_LOOPBACK 0x0008 #define LINUX_IFF_POINTOPOINT 0x0010 #define LINUX_IFF_NOTRAILERS 0x0020 #define LINUX_IFF_RUNNING 0x0040 #define LINUX_IFF_NOARP 0x0080 #define LINUX_IFF_PROMISC 0x0100 #define LINUX_IFF_ALLMULTI 0x0200 #define LINUX_IFF_MASTER 0x0400 #define LINUX_IFF_SLAVE 0x0800 #define LINUX_IFF_MULTICAST 0x1000 #define LINUX_IFF_PORTSEL 0x2000 #define LINUX_IFF_AUTOMEDIA 0x4000 #define LINUX_IFF_DYNAMIC 0x8000 /* sigaltstack */ #define LINUX_SS_ONSTACK 1 #define LINUX_SS_DISABLE 2 int linux_to_bsd_sigaltstack(int lsa); int bsd_to_linux_sigaltstack(int bsa); /* sigset */ typedef struct { uint64_t __mask; } l_sigset_t; /* primitives to manipulate sigset_t */ #define LINUX_SIGEMPTYSET(set) (set).__mask = 0 #define LINUX_SIGISMEMBER(set, sig) (1UL & ((set).__mask >> _SIG_IDX(sig))) #define LINUX_SIGADDSET(set, sig) (set).__mask |= 1UL << _SIG_IDX(sig) void linux_to_bsd_sigset(l_sigset_t *, sigset_t *); void bsd_to_linux_sigset(sigset_t *, l_sigset_t *); /* signaling */ #define LINUX_SIGHUP 1 #define LINUX_SIGINT 2 #define LINUX_SIGQUIT 3 #define LINUX_SIGILL 4 #define LINUX_SIGTRAP 5 #define LINUX_SIGABRT 6 #define LINUX_SIGIOT LINUX_SIGABRT #define LINUX_SIGBUS 7 #define LINUX_SIGFPE 8 #define LINUX_SIGKILL 9 #define LINUX_SIGUSR1 10 #define LINUX_SIGSEGV 11 #define LINUX_SIGUSR2 12 #define LINUX_SIGPIPE 13 #define LINUX_SIGALRM 14 #define LINUX_SIGTERM 15 #define LINUX_SIGSTKFLT 16 #define LINUX_SIGCHLD 17 #define LINUX_SIGCONT 18 #define LINUX_SIGSTOP 19 #define LINUX_SIGTSTP 20 #define LINUX_SIGTTIN 21 #define LINUX_SIGTTOU 22 #define LINUX_SIGURG 23 #define LINUX_SIGXCPU 24 #define LINUX_SIGXFSZ 25 #define LINUX_SIGVTALRM 26 #define LINUX_SIGPROF 27 #define LINUX_SIGWINCH 28 #define LINUX_SIGIO 29 #define LINUX_SIGPOLL LINUX_SIGIO #define LINUX_SIGPWR 30 #define LINUX_SIGSYS 31 #define LINUX_SIGTBLSZ 31 #define LINUX_SIGRTMIN 32 #define LINUX_SIGRTMAX 64 #define LINUX_SIG_VALID(sig) ((sig) <= LINUX_SIGRTMAX && (sig) > 0) int linux_to_bsd_signal(int sig); int bsd_to_linux_signal(int sig); extern LIST_HEAD(futex_list, futex) futex_list; extern struct mtx futex_mtx; void linux_dev_shm_create(void); void linux_dev_shm_destroy(void); /* * mask=0 is not sensible for this application, so it will be taken to mean * a mask equivalent to the value. Otherwise, (word & mask) == value maps to * (word & ~mask) | value in a bitfield for the platform we're converting to. */ struct bsd_to_linux_bitmap { int bsd_mask; int bsd_value; int linux_mask; int linux_value; }; int bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap, size_t mapcnt, int no_value); int linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap, size_t mapcnt, int no_value); /* * These functions are used for simplification of BSD <-> Linux bit conversions. * Given `value`, a bit field, these functions will walk the given bitmap table * and set the appropriate bits for the target platform. If any bits were * successfully converted, then the return value is the equivalent of value * represented with the bit values appropriate for the target platform. * Otherwise, the value supplied as `no_value` is returned. */ #define bsd_to_linux_bits(_val, _bmap, _noval) \ bsd_to_linux_bits_((_val), (_bmap), nitems((_bmap)), (_noval)) #define linux_to_bsd_bits(_val, _bmap, _noval) \ linux_to_bsd_bits_((_val), (_bmap), nitems((_bmap)), (_noval)) /* * Easy mapping helpers. BITMAP_EASY_LINUX represents a single bit to be * translated, and the FreeBSD and Linux values are supplied. BITMAP_1t1_LINUX * is the extreme version of this, where not only is it a single bit, but the * name of the macro used to represent the Linux version of a bit literally has * LINUX_ prepended to the normal name. */ #define BITMAP_EASY_LINUX(_name, _linux_name) \ { \ .bsd_value = (_name), \ .linux_value = (_linux_name), \ } #define BITMAP_1t1_LINUX(_name) BITMAP_EASY_LINUX(_name, LINUX_##_name) +int linux_to_bsd_errno(int error); + #endif /* _LINUX_MI_H_ */ Index: head/sys/compat/linux/linux_errno.c =================================================================== --- head/sys/compat/linux/linux_errno.c (revision 365831) +++ head/sys/compat/linux/linux_errno.c (revision 365832) @@ -1,6 +1,21 @@ /* $FreeBSD$ */ #include +__FBSDID("$FreeBSD$"); + +#include #include +#include +#include #include + +int +linux_to_bsd_errno(int error) +{ + + KASSERT(error >= 0 && error <= ELAST, + ("%s: bad error %d", __func__, error)); + + return (linux_errtbl[error]); +} Index: head/sys/compat/linux/linux_socket.c =================================================================== --- head/sys/compat/linux/linux_socket.c (revision 365831) +++ head/sys/compat/linux/linux_socket.c (revision 365832) @@ -1,1836 +1,1836 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* XXX we use functions that might not exist. */ #include "opt_compat.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include static int linux_sendmsg_common(struct thread *, l_int, struct l_msghdr *, l_uint); static int linux_recvmsg_common(struct thread *, l_int, struct l_msghdr *, l_uint, struct msghdr *); static int linux_set_socket_flags(int, int *); static int linux_to_bsd_sockopt_level(int level) { if (level == LINUX_SOL_SOCKET) return (SOL_SOCKET); /* Remaining values are RFC-defined protocol numbers. */ return (level); } static int bsd_to_linux_sockopt_level(int level) { if (level == SOL_SOCKET) return (LINUX_SOL_SOCKET); return (level); } static int linux_to_bsd_ip_sockopt(int opt) { switch (opt) { case LINUX_IP_TOS: return (IP_TOS); case LINUX_IP_TTL: return (IP_TTL); case LINUX_IP_OPTIONS: return (IP_OPTIONS); case LINUX_IP_MULTICAST_IF: return (IP_MULTICAST_IF); case LINUX_IP_MULTICAST_TTL: return (IP_MULTICAST_TTL); case LINUX_IP_MULTICAST_LOOP: return (IP_MULTICAST_LOOP); case LINUX_IP_ADD_MEMBERSHIP: return (IP_ADD_MEMBERSHIP); case LINUX_IP_DROP_MEMBERSHIP: return (IP_DROP_MEMBERSHIP); case LINUX_IP_HDRINCL: return (IP_HDRINCL); } return (-1); } static int linux_to_bsd_ip6_sockopt(int opt) { switch (opt) { case LINUX_IPV6_NEXTHOP: return (IPV6_NEXTHOP); case LINUX_IPV6_UNICAST_HOPS: return (IPV6_UNICAST_HOPS); case LINUX_IPV6_MULTICAST_IF: return (IPV6_MULTICAST_IF); case LINUX_IPV6_MULTICAST_HOPS: return (IPV6_MULTICAST_HOPS); case LINUX_IPV6_MULTICAST_LOOP: return (IPV6_MULTICAST_LOOP); case LINUX_IPV6_ADD_MEMBERSHIP: return (IPV6_JOIN_GROUP); case LINUX_IPV6_DROP_MEMBERSHIP: return (IPV6_LEAVE_GROUP); case LINUX_IPV6_V6ONLY: return (IPV6_V6ONLY); case LINUX_IPV6_DONTFRAG: return (IPV6_DONTFRAG); #if 0 case LINUX_IPV6_CHECKSUM: return (IPV6_CHECKSUM); case LINUX_IPV6_RECVPKTINFO: return (IPV6_RECVPKTINFO); case LINUX_IPV6_PKTINFO: return (IPV6_PKTINFO); case LINUX_IPV6_RECVHOPLIMIT: return (IPV6_RECVHOPLIMIT); case LINUX_IPV6_HOPLIMIT: return (IPV6_HOPLIMIT); case LINUX_IPV6_RECVHOPOPTS: return (IPV6_RECVHOPOPTS); case LINUX_IPV6_HOPOPTS: return (IPV6_HOPOPTS); case LINUX_IPV6_RTHDRDSTOPTS: return (IPV6_RTHDRDSTOPTS); case LINUX_IPV6_RECVRTHDR: return (IPV6_RECVRTHDR); case LINUX_IPV6_RTHDR: return (IPV6_RTHDR); case LINUX_IPV6_RECVDSTOPTS: return (IPV6_RECVDSTOPTS); case LINUX_IPV6_DSTOPTS: return (IPV6_DSTOPTS); case LINUX_IPV6_RECVPATHMTU: return (IPV6_RECVPATHMTU); case LINUX_IPV6_PATHMTU: return (IPV6_PATHMTU); #endif } return (-1); } static int linux_to_bsd_so_sockopt(int opt) { switch (opt) { case LINUX_SO_DEBUG: return (SO_DEBUG); case LINUX_SO_REUSEADDR: return (SO_REUSEADDR); case LINUX_SO_TYPE: return (SO_TYPE); case LINUX_SO_ERROR: return (SO_ERROR); case LINUX_SO_DONTROUTE: return (SO_DONTROUTE); case LINUX_SO_BROADCAST: return (SO_BROADCAST); case LINUX_SO_SNDBUF: case LINUX_SO_SNDBUFFORCE: return (SO_SNDBUF); case LINUX_SO_RCVBUF: case LINUX_SO_RCVBUFFORCE: return (SO_RCVBUF); case LINUX_SO_KEEPALIVE: return (SO_KEEPALIVE); case LINUX_SO_OOBINLINE: return (SO_OOBINLINE); case LINUX_SO_LINGER: return (SO_LINGER); case LINUX_SO_REUSEPORT: return (SO_REUSEPORT_LB); case LINUX_SO_PEERCRED: return (LOCAL_PEERCRED); case LINUX_SO_RCVLOWAT: return (SO_RCVLOWAT); case LINUX_SO_SNDLOWAT: return (SO_SNDLOWAT); case LINUX_SO_RCVTIMEO: return (SO_RCVTIMEO); case LINUX_SO_SNDTIMEO: return (SO_SNDTIMEO); case LINUX_SO_TIMESTAMP: return (SO_TIMESTAMP); case LINUX_SO_ACCEPTCONN: return (SO_ACCEPTCONN); case LINUX_SO_PROTOCOL: return (SO_PROTOCOL); } return (-1); } static int linux_to_bsd_tcp_sockopt(int opt) { switch (opt) { case LINUX_TCP_NODELAY: return (TCP_NODELAY); case LINUX_TCP_MAXSEG: return (TCP_MAXSEG); case LINUX_TCP_CORK: return (TCP_NOPUSH); case LINUX_TCP_KEEPIDLE: return (TCP_KEEPIDLE); case LINUX_TCP_KEEPINTVL: return (TCP_KEEPINTVL); case LINUX_TCP_KEEPCNT: return (TCP_KEEPCNT); case LINUX_TCP_MD5SIG: return (TCP_MD5SIG); } return (-1); } static int linux_to_bsd_msg_flags(int flags) { int ret_flags = 0; if (flags & LINUX_MSG_OOB) ret_flags |= MSG_OOB; if (flags & LINUX_MSG_PEEK) ret_flags |= MSG_PEEK; if (flags & LINUX_MSG_DONTROUTE) ret_flags |= MSG_DONTROUTE; if (flags & LINUX_MSG_CTRUNC) ret_flags |= MSG_CTRUNC; if (flags & LINUX_MSG_TRUNC) ret_flags |= MSG_TRUNC; if (flags & LINUX_MSG_DONTWAIT) ret_flags |= MSG_DONTWAIT; if (flags & LINUX_MSG_EOR) ret_flags |= MSG_EOR; if (flags & LINUX_MSG_WAITALL) ret_flags |= MSG_WAITALL; if (flags & LINUX_MSG_NOSIGNAL) ret_flags |= MSG_NOSIGNAL; #if 0 /* not handled */ if (flags & LINUX_MSG_PROXY) ; if (flags & LINUX_MSG_FIN) ; if (flags & LINUX_MSG_SYN) ; if (flags & LINUX_MSG_CONFIRM) ; if (flags & LINUX_MSG_RST) ; if (flags & LINUX_MSG_ERRQUEUE) ; #endif return (ret_flags); } static int linux_to_bsd_cmsg_type(int cmsg_type) { switch (cmsg_type) { case LINUX_SCM_RIGHTS: return (SCM_RIGHTS); case LINUX_SCM_CREDENTIALS: return (SCM_CREDS); } return (-1); } static int bsd_to_linux_cmsg_type(int cmsg_type) { switch (cmsg_type) { case SCM_RIGHTS: return (LINUX_SCM_RIGHTS); case SCM_CREDS: return (LINUX_SCM_CREDENTIALS); case SCM_TIMESTAMP: return (LINUX_SCM_TIMESTAMP); } return (-1); } static int linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr) { if (lhdr->msg_controllen > INT_MAX) return (ENOBUFS); bhdr->msg_name = PTRIN(lhdr->msg_name); bhdr->msg_namelen = lhdr->msg_namelen; bhdr->msg_iov = PTRIN(lhdr->msg_iov); bhdr->msg_iovlen = lhdr->msg_iovlen; bhdr->msg_control = PTRIN(lhdr->msg_control); /* * msg_controllen is skipped since BSD and LINUX control messages * are potentially different sizes (e.g. the cred structure used * by SCM_CREDS is different between the two operating system). * * The caller can set it (if necessary) after converting all the * control messages. */ bhdr->msg_flags = linux_to_bsd_msg_flags(lhdr->msg_flags); return (0); } static int bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr) { lhdr->msg_name = PTROUT(bhdr->msg_name); lhdr->msg_namelen = bhdr->msg_namelen; lhdr->msg_iov = PTROUT(bhdr->msg_iov); lhdr->msg_iovlen = bhdr->msg_iovlen; lhdr->msg_control = PTROUT(bhdr->msg_control); /* * msg_controllen is skipped since BSD and LINUX control messages * are potentially different sizes (e.g. the cred structure used * by SCM_CREDS is different between the two operating system). * * The caller can set it (if necessary) after converting all the * control messages. */ /* msg_flags skipped */ return (0); } static int linux_set_socket_flags(int lflags, int *flags) { if (lflags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) return (EINVAL); if (lflags & LINUX_SOCK_NONBLOCK) *flags |= SOCK_NONBLOCK; if (lflags & LINUX_SOCK_CLOEXEC) *flags |= SOCK_CLOEXEC; return (0); } static int linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg) { struct sockaddr *to; int error, len; if (mp->msg_name != NULL) { len = mp->msg_namelen; error = linux_to_bsd_sockaddr(mp->msg_name, &to, &len); if (error != 0) return (error); mp->msg_name = to; } else to = NULL; error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control, segflg); if (to) free(to, M_SONAME); return (error); } /* Return 0 if IP_HDRINCL is set for the given socket. */ static int linux_check_hdrincl(struct thread *td, int s) { int error, optval; socklen_t size_val; size_val = sizeof(optval); error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL, &optval, UIO_SYSSPACE, &size_val); if (error != 0) return (error); return (optval == 0); } /* * Updated sendto() when IP_HDRINCL is set: * tweak endian-dependent fields in the IP packet. */ static int linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args) { /* * linux_ip_copysize defines how many bytes we should copy * from the beginning of the IP packet before we customize it for BSD. * It should include all the fields we modify (ip_len and ip_off). */ #define linux_ip_copysize 8 struct ip *packet; struct msghdr msg; struct iovec aiov[1]; int error; /* Check that the packet isn't too big or too small. */ if (linux_args->len < linux_ip_copysize || linux_args->len > IP_MAXPACKET) return (EINVAL); packet = (struct ip *)malloc(linux_args->len, M_LINUX, M_WAITOK); /* Make kernel copy of the packet to be sent */ if ((error = copyin(PTRIN(linux_args->msg), packet, linux_args->len))) goto goout; /* Convert fields from Linux to BSD raw IP socket format */ packet->ip_len = linux_args->len; packet->ip_off = ntohs(packet->ip_off); /* Prepare the msghdr and iovec structures describing the new packet */ msg.msg_name = PTRIN(linux_args->to); msg.msg_namelen = linux_args->tolen; msg.msg_iov = aiov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_flags = 0; aiov[0].iov_base = (char *)packet; aiov[0].iov_len = linux_args->len; error = linux_sendit(td, linux_args->s, &msg, linux_args->flags, NULL, UIO_SYSSPACE); goout: free(packet, M_LINUX); return (error); } int linux_socket(struct thread *td, struct linux_socket_args *args) { int domain, retval_socket, type; type = args->type & LINUX_SOCK_TYPE_MASK; if (type < 0 || type > LINUX_SOCK_MAX) return (EINVAL); retval_socket = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, &type); if (retval_socket != 0) return (retval_socket); domain = linux_to_bsd_domain(args->domain); if (domain == -1) return (EAFNOSUPPORT); retval_socket = kern_socket(td, domain, type, args->protocol); if (retval_socket) return (retval_socket); if (type == SOCK_RAW && (args->protocol == IPPROTO_RAW || args->protocol == 0) && domain == PF_INET) { /* It's a raw IP socket: set the IP_HDRINCL option. */ int hdrincl; hdrincl = 1; /* We ignore any error returned by kern_setsockopt() */ kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL, &hdrincl, UIO_SYSSPACE, sizeof(hdrincl)); } #ifdef INET6 /* * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default * and some apps depend on this. So, set V6ONLY to 0 for Linux apps. * For simplicity we do this unconditionally of the net.inet6.ip6.v6only * sysctl value. */ if (domain == PF_INET6) { int v6only; v6only = 0; /* We ignore any error returned by setsockopt() */ kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY, &v6only, UIO_SYSSPACE, sizeof(v6only)); } #endif return (retval_socket); } int linux_bind(struct thread *td, struct linux_bind_args *args) { struct sockaddr *sa; int error; error = linux_to_bsd_sockaddr(PTRIN(args->name), &sa, &args->namelen); if (error != 0) return (error); error = kern_bindat(td, AT_FDCWD, args->s, sa); free(sa, M_SONAME); /* XXX */ if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in)) return (EINVAL); return (error); } int linux_connect(struct thread *td, struct linux_connect_args *args) { struct socket *so; struct sockaddr *sa; struct file *fp; u_int fflag; int error; error = linux_to_bsd_sockaddr(PTRIN(args->name), &sa, &args->namelen); if (error != 0) return (error); error = kern_connectat(td, AT_FDCWD, args->s, sa); free(sa, M_SONAME); if (error != EISCONN) return (error); /* * Linux doesn't return EISCONN the first time it occurs, * when on a non-blocking socket. Instead it returns the * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD. */ error = getsock_cap(td, args->s, &cap_connect_rights, &fp, &fflag, NULL); if (error != 0) return (error); error = EISCONN; so = fp->f_data; if (fflag & FNONBLOCK) { SOCK_LOCK(so); if (so->so_emuldata == 0) error = so->so_error; so->so_emuldata = (void *)1; SOCK_UNLOCK(so); } fdrop(fp, td); return (error); } int linux_listen(struct thread *td, struct linux_listen_args *args) { return (kern_listen(td, args->s, args->backlog)); } static int linux_accept_common(struct thread *td, int s, l_uintptr_t addr, l_uintptr_t namelen, int flags) { struct l_sockaddr *lsa; struct sockaddr *sa; struct file *fp, *fp1; int bflags, len; struct socket *so; int error, error1; bflags = 0; fp = NULL; sa = NULL; error = linux_set_socket_flags(flags, &bflags); if (error != 0) return (error); if (PTRIN(addr) == NULL) { len = 0; error = kern_accept4(td, s, NULL, NULL, bflags, NULL); } else { error = copyin(PTRIN(namelen), &len, sizeof(len)); if (error != 0) return (error); if (len < 0) return (EINVAL); error = kern_accept4(td, s, &sa, &len, bflags, &fp); } /* * Translate errno values into ones used by Linux. */ if (error != 0) { /* * XXX. This is wrong, different sockaddr structures * have different sizes. */ switch (error) { case EFAULT: if (namelen != sizeof(struct sockaddr_in)) error = EINVAL; break; case EINVAL: error1 = getsock_cap(td, s, &cap_accept_rights, &fp1, NULL, NULL); if (error1 != 0) { error = error1; break; } so = fp1->f_data; if (so->so_type == SOCK_DGRAM) error = EOPNOTSUPP; fdrop(fp1, td); break; } return (error); } if (len != 0) { error = bsd_to_linux_sockaddr(sa, &lsa, len); if (error == 0) error = copyout(lsa, PTRIN(addr), len); free(lsa, M_SONAME); /* * XXX: We should also copyout the len, shouldn't we? */ if (error != 0) { fdclose(td, fp, td->td_retval[0]); td->td_retval[0] = 0; } } if (fp != NULL) fdrop(fp, td); free(sa, M_SONAME); return (error); } int linux_accept(struct thread *td, struct linux_accept_args *args) { return (linux_accept_common(td, args->s, args->addr, args->namelen, 0)); } int linux_accept4(struct thread *td, struct linux_accept4_args *args) { return (linux_accept_common(td, args->s, args->addr, args->namelen, args->flags)); } int linux_getsockname(struct thread *td, struct linux_getsockname_args *args) { struct l_sockaddr *lsa; struct sockaddr *sa; int len, error; error = copyin(PTRIN(args->namelen), &len, sizeof(len)); if (error != 0) return (error); error = kern_getsockname(td, args->s, &sa, &len); if (error != 0) return (error); if (len != 0) { error = bsd_to_linux_sockaddr(sa, &lsa, len); if (error == 0) error = copyout(lsa, PTRIN(args->addr), len); free(lsa, M_SONAME); } free(sa, M_SONAME); if (error == 0) error = copyout(&len, PTRIN(args->namelen), sizeof(len)); return (error); } int linux_getpeername(struct thread *td, struct linux_getpeername_args *args) { struct l_sockaddr *lsa; struct sockaddr *sa; int len, error; error = copyin(PTRIN(args->namelen), &len, sizeof(len)); if (error != 0) return (error); if (len < 0) return (EINVAL); error = kern_getpeername(td, args->s, &sa, &len); if (error != 0) return (error); if (len != 0) { error = bsd_to_linux_sockaddr(sa, &lsa, len); if (error == 0) error = copyout(lsa, PTRIN(args->addr), len); free(lsa, M_SONAME); } free(sa, M_SONAME); if (error == 0) error = copyout(&len, PTRIN(args->namelen), sizeof(len)); return (error); } int linux_socketpair(struct thread *td, struct linux_socketpair_args *args) { int domain, error, sv[2], type; domain = linux_to_bsd_domain(args->domain); if (domain != PF_LOCAL) return (EAFNOSUPPORT); type = args->type & LINUX_SOCK_TYPE_MASK; if (type < 0 || type > LINUX_SOCK_MAX) return (EINVAL); error = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, &type); if (error != 0) return (error); if (args->protocol != 0 && args->protocol != PF_UNIX) { /* * Use of PF_UNIX as protocol argument is not right, * but Linux does it. * Do not map PF_UNIX as its Linux value is identical * to FreeBSD one. */ return (EPROTONOSUPPORT); } error = kern_socketpair(td, domain, type, 0, sv); if (error != 0) return (error); error = copyout(sv, PTRIN(args->rsv), 2 * sizeof(int)); if (error != 0) { (void)kern_close(td, sv[0]); (void)kern_close(td, sv[1]); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct linux_send_args { register_t s; register_t msg; register_t len; register_t flags; }; static int linux_send(struct thread *td, struct linux_send_args *args) { struct sendto_args /* { int s; caddr_t buf; int len; int flags; caddr_t to; int tolen; } */ bsd_args; struct file *fp; int error, fflag; bsd_args.s = args->s; bsd_args.buf = (caddr_t)PTRIN(args->msg); bsd_args.len = args->len; bsd_args.flags = args->flags; bsd_args.to = NULL; bsd_args.tolen = 0; error = sys_sendto(td, &bsd_args); if (error == ENOTCONN) { /* * Linux doesn't return ENOTCONN for non-blocking sockets. * Instead it returns the EAGAIN. */ error = getsock_cap(td, args->s, &cap_send_rights, &fp, &fflag, NULL); if (error == 0) { if (fflag & FNONBLOCK) error = EAGAIN; fdrop(fp, td); } } return (error); } struct linux_recv_args { register_t s; register_t msg; register_t len; register_t flags; }; static int linux_recv(struct thread *td, struct linux_recv_args *args) { struct recvfrom_args /* { int s; caddr_t buf; int len; int flags; struct sockaddr *from; socklen_t fromlenaddr; } */ bsd_args; bsd_args.s = args->s; bsd_args.buf = (caddr_t)PTRIN(args->msg); bsd_args.len = args->len; bsd_args.flags = linux_to_bsd_msg_flags(args->flags); bsd_args.from = NULL; bsd_args.fromlenaddr = 0; return (sys_recvfrom(td, &bsd_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_sendto(struct thread *td, struct linux_sendto_args *args) { struct msghdr msg; struct iovec aiov; if (linux_check_hdrincl(td, args->s) == 0) /* IP_HDRINCL set, tweak the packet before sending */ return (linux_sendto_hdrincl(td, args)); msg.msg_name = PTRIN(args->to); msg.msg_namelen = args->tolen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_flags = 0; aiov.iov_base = PTRIN(args->msg); aiov.iov_len = args->len; return (linux_sendit(td, args->s, &msg, args->flags, NULL, UIO_USERSPACE)); } int linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args) { struct l_sockaddr *lsa; struct sockaddr *sa; struct msghdr msg; struct iovec aiov; int error, fromlen; if (PTRIN(args->fromlen) != NULL) { error = copyin(PTRIN(args->fromlen), &fromlen, sizeof(fromlen)); if (error != 0) return (error); if (fromlen < 0) return (EINVAL); sa = malloc(fromlen, M_SONAME, M_WAITOK); } else { fromlen = 0; sa = NULL; } msg.msg_name = sa; msg.msg_namelen = fromlen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(args->buf); aiov.iov_len = args->len; msg.msg_control = 0; msg.msg_flags = linux_to_bsd_msg_flags(args->flags); error = kern_recvit(td, args->s, &msg, UIO_SYSSPACE, NULL); if (error != 0) goto out; if (PTRIN(args->from) != NULL) { error = bsd_to_linux_sockaddr(sa, &lsa, msg.msg_namelen); if (error == 0) error = copyout(lsa, PTRIN(args->from), msg.msg_namelen); free(lsa, M_SONAME); } if (error == 0 && PTRIN(args->fromlen) != NULL) error = copyout(&msg.msg_namelen, PTRIN(args->fromlen), sizeof(msg.msg_namelen)); out: free(sa, M_SONAME); return (error); } static int linux_sendmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, l_uint flags) { struct cmsghdr *cmsg; struct mbuf *control; struct msghdr msg; struct l_cmsghdr linux_cmsg; struct l_cmsghdr *ptr_cmsg; struct l_msghdr linux_msghdr; struct iovec *iov; socklen_t datalen; struct sockaddr *sa; struct socket *so; sa_family_t sa_family; struct file *fp; void *data; l_size_t len; l_size_t clen; int error, fflag; error = copyin(msghdr, &linux_msghdr, sizeof(linux_msghdr)); if (error != 0) return (error); /* * Some Linux applications (ping) define a non-NULL control data * pointer, but a msg_controllen of 0, which is not allowed in the * FreeBSD system call interface. NULL the msg_control pointer in * order to handle this case. This should be checked, but allows the * Linux ping to work. */ if (PTRIN(linux_msghdr.msg_control) != NULL && linux_msghdr.msg_controllen == 0) linux_msghdr.msg_control = PTROUT(NULL); error = linux_to_bsd_msghdr(&msg, &linux_msghdr); if (error != 0) return (error); #ifdef COMPAT_LINUX32 error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen, &iov, EMSGSIZE); #else error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); #endif if (error != 0) return (error); control = NULL; error = kern_getsockname(td, s, &sa, &datalen); if (error != 0) goto bad; sa_family = sa->sa_family; free(sa, M_SONAME); if (flags & LINUX_MSG_OOB) { error = EOPNOTSUPP; if (sa_family == AF_UNIX) goto bad; error = getsock_cap(td, s, &cap_send_rights, &fp, &fflag, NULL); if (error != 0) goto bad; so = fp->f_data; if (so->so_type != SOCK_STREAM) error = EOPNOTSUPP; fdrop(fp, td); if (error != 0) goto bad; } if (linux_msghdr.msg_controllen >= sizeof(struct l_cmsghdr)) { error = ENOBUFS; control = m_get(M_WAITOK, MT_CONTROL); MCLGET(control, M_WAITOK); data = mtod(control, void *); datalen = 0; ptr_cmsg = PTRIN(linux_msghdr.msg_control); clen = linux_msghdr.msg_controllen; do { error = copyin(ptr_cmsg, &linux_cmsg, sizeof(struct l_cmsghdr)); if (error != 0) goto bad; error = EINVAL; if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr) || linux_cmsg.cmsg_len > clen) goto bad; if (datalen + CMSG_HDRSZ > MCLBYTES) goto bad; /* * Now we support only SCM_RIGHTS and SCM_CRED, * so return EINVAL in any other cmsg_type */ cmsg = data; cmsg->cmsg_type = linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type); cmsg->cmsg_level = linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level); if (cmsg->cmsg_type == -1 || cmsg->cmsg_level != SOL_SOCKET) { linux_msg(curthread, "unsupported sendmsg cmsg level %d type %d", linux_cmsg.cmsg_level, linux_cmsg.cmsg_type); goto bad; } /* * Some applications (e.g. pulseaudio) attempt to * send ancillary data even if the underlying protocol * doesn't support it which is not allowed in the * FreeBSD system call interface. */ if (sa_family != AF_UNIX) goto next; if (cmsg->cmsg_type == SCM_CREDS) { len = sizeof(struct cmsgcred); if (datalen + CMSG_SPACE(len) > MCLBYTES) goto bad; /* * The lower levels will fill in the structure */ memset(CMSG_DATA(data), 0, len); } else { len = linux_cmsg.cmsg_len - L_CMSG_HDRSZ; if (datalen + CMSG_SPACE(len) < datalen || datalen + CMSG_SPACE(len) > MCLBYTES) goto bad; error = copyin(LINUX_CMSG_DATA(ptr_cmsg), CMSG_DATA(data), len); if (error != 0) goto bad; } cmsg->cmsg_len = CMSG_LEN(len); data = (char *)data + CMSG_SPACE(len); datalen += CMSG_SPACE(len); next: if (clen <= LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len)) break; clen -= LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len); ptr_cmsg = (struct l_cmsghdr *)((char *)ptr_cmsg + LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len)); } while(clen >= sizeof(struct l_cmsghdr)); control->m_len = datalen; if (datalen == 0) { m_freem(control); control = NULL; } } msg.msg_iov = iov; msg.msg_flags = 0; error = linux_sendit(td, s, &msg, flags, control, UIO_USERSPACE); control = NULL; bad: m_freem(control); free(iov, M_IOV); return (error); } int linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) { return (linux_sendmsg_common(td, args->s, PTRIN(args->msg), args->flags)); } int linux_sendmmsg(struct thread *td, struct linux_sendmmsg_args *args) { struct l_mmsghdr *msg; l_uint retval; int error, datagrams; if (args->vlen > UIO_MAXIOV) args->vlen = UIO_MAXIOV; msg = PTRIN(args->msg); datagrams = 0; while (datagrams < args->vlen) { error = linux_sendmsg_common(td, args->s, &msg->msg_hdr, args->flags); if (error != 0) break; retval = td->td_retval[0]; error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); if (error != 0) break; ++msg; ++datagrams; } if (error == 0) td->td_retval[0] = datagrams; return (error); } static int linux_recvmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, l_uint flags, struct msghdr *msg) { struct cmsghdr *cm; struct cmsgcred *cmcred; struct l_cmsghdr *linux_cmsg = NULL; struct l_ucred linux_ucred; socklen_t datalen, maxlen, outlen; struct l_msghdr linux_msghdr; struct iovec *iov, *uiov; struct mbuf *control = NULL; struct mbuf **controlp; struct timeval *ftmvl; struct l_sockaddr *lsa; struct sockaddr *sa; l_timeval ltmvl; caddr_t outbuf; void *data; int error, i, fd, fds, *fdp; error = copyin(msghdr, &linux_msghdr, sizeof(linux_msghdr)); if (error != 0) return (error); error = linux_to_bsd_msghdr(msg, &linux_msghdr); if (error != 0) return (error); #ifdef COMPAT_LINUX32 error = linux32_copyiniov(PTRIN(msg->msg_iov), msg->msg_iovlen, &iov, EMSGSIZE); #else error = copyiniov(msg->msg_iov, msg->msg_iovlen, &iov, EMSGSIZE); #endif if (error != 0) return (error); if (msg->msg_name != NULL && msg->msg_namelen > 0) { msg->msg_namelen = min(msg->msg_namelen, SOCK_MAXADDRLEN); sa = malloc(msg->msg_namelen, M_SONAME, M_WAITOK); msg->msg_name = sa; } else { sa = NULL; msg->msg_name = NULL; } uiov = msg->msg_iov; msg->msg_iov = iov; controlp = (msg->msg_control != NULL) ? &control : NULL; error = kern_recvit(td, s, msg, UIO_SYSSPACE, controlp); msg->msg_iov = uiov; if (error != 0) goto bad; /* * Note that kern_recvit() updates msg->msg_namelen. */ if (msg->msg_name != NULL && msg->msg_namelen > 0) { msg->msg_name = PTRIN(linux_msghdr.msg_name); error = bsd_to_linux_sockaddr(sa, &lsa, msg->msg_namelen); if (error == 0) error = copyout(lsa, PTRIN(msg->msg_name), msg->msg_namelen); free(lsa, M_SONAME); if (error != 0) goto bad; } error = bsd_to_linux_msghdr(msg, &linux_msghdr); if (error != 0) goto bad; maxlen = linux_msghdr.msg_controllen; linux_msghdr.msg_controllen = 0; if (control) { linux_cmsg = malloc(L_CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO); msg->msg_control = mtod(control, struct cmsghdr *); msg->msg_controllen = control->m_len; cm = CMSG_FIRSTHDR(msg); outbuf = PTRIN(linux_msghdr.msg_control); outlen = 0; while (cm != NULL) { linux_cmsg->cmsg_type = bsd_to_linux_cmsg_type(cm->cmsg_type); linux_cmsg->cmsg_level = bsd_to_linux_sockopt_level(cm->cmsg_level); if (linux_cmsg->cmsg_type == -1 || cm->cmsg_level != SOL_SOCKET) { linux_msg(curthread, "unsupported recvmsg cmsg level %d type %d", cm->cmsg_level, cm->cmsg_type); error = EINVAL; goto bad; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; switch (cm->cmsg_type) { case SCM_RIGHTS: if (flags & LINUX_MSG_CMSG_CLOEXEC) { fds = datalen / sizeof(int); fdp = data; for (i = 0; i < fds; i++) { fd = *fdp++; (void)kern_fcntl(td, fd, F_SETFD, FD_CLOEXEC); } } break; case SCM_CREDS: /* * Currently LOCAL_CREDS is never in * effect for Linux so no need to worry * about sockcred */ if (datalen != sizeof(*cmcred)) { error = EMSGSIZE; goto bad; } cmcred = (struct cmsgcred *)data; bzero(&linux_ucred, sizeof(linux_ucred)); linux_ucred.pid = cmcred->cmcred_pid; linux_ucred.uid = cmcred->cmcred_uid; linux_ucred.gid = cmcred->cmcred_gid; data = &linux_ucred; datalen = sizeof(linux_ucred); break; case SCM_TIMESTAMP: if (datalen != sizeof(struct timeval)) { error = EMSGSIZE; goto bad; } ftmvl = (struct timeval *)data; ltmvl.tv_sec = ftmvl->tv_sec; ltmvl.tv_usec = ftmvl->tv_usec; data = <mvl; datalen = sizeof(ltmvl); break; } if (outlen + LINUX_CMSG_LEN(datalen) > maxlen) { if (outlen == 0) { error = EMSGSIZE; goto bad; } else { linux_msghdr.msg_flags |= LINUX_MSG_CTRUNC; m_dispose_extcontrolm(control); goto out; } } linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen); error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ); if (error != 0) goto bad; outbuf += L_CMSG_HDRSZ; error = copyout(data, outbuf, datalen); if (error != 0) goto bad; outbuf += LINUX_CMSG_ALIGN(datalen); outlen += LINUX_CMSG_LEN(datalen); cm = CMSG_NXTHDR(msg, cm); } linux_msghdr.msg_controllen = outlen; } out: error = copyout(&linux_msghdr, msghdr, sizeof(linux_msghdr)); bad: if (control != NULL) { if (error != 0) m_dispose_extcontrolm(control); m_freem(control); } free(iov, M_IOV); free(linux_cmsg, M_LINUX); free(sa, M_SONAME); return (error); } int linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) { struct msghdr bsd_msg; return (linux_recvmsg_common(td, args->s, PTRIN(args->msg), args->flags, &bsd_msg)); } int linux_recvmmsg(struct thread *td, struct linux_recvmmsg_args *args) { struct l_mmsghdr *msg; struct msghdr bsd_msg; struct l_timespec lts; struct timespec ts, tts; l_uint retval; int error, datagrams; if (args->timeout) { error = copyin(args->timeout, <s, sizeof(struct l_timespec)); if (error != 0) return (error); error = linux_to_native_timespec(&ts, <s); if (error != 0) return (error); getnanotime(&tts); timespecadd(&tts, &ts, &tts); } msg = PTRIN(args->msg); datagrams = 0; while (datagrams < args->vlen) { error = linux_recvmsg_common(td, args->s, &msg->msg_hdr, args->flags & ~LINUX_MSG_WAITFORONE, &bsd_msg); if (error != 0) break; retval = td->td_retval[0]; error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); if (error != 0) break; ++msg; ++datagrams; /* * MSG_WAITFORONE turns on MSG_DONTWAIT after one packet. */ if (args->flags & LINUX_MSG_WAITFORONE) args->flags |= LINUX_MSG_DONTWAIT; /* * See BUGS section of recvmmsg(2). */ if (args->timeout) { getnanotime(&ts); timespecsub(&ts, &tts, &ts); if (!timespecisset(&ts) || ts.tv_sec > 0) break; } /* Out of band data, return right away. */ if (bsd_msg.msg_flags & MSG_OOB) break; } if (error == 0) td->td_retval[0] = datagrams; return (error); } int linux_shutdown(struct thread *td, struct linux_shutdown_args *args) { return (kern_shutdown(td, args->s, args->how)); } int linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args) { l_timeval linux_tv; struct sockaddr *sa; struct timeval tv; socklen_t len; int error, level, name; level = linux_to_bsd_sockopt_level(args->level); switch (level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(args->optname); switch (name) { case SO_RCVTIMEO: /* FALLTHROUGH */ case SO_SNDTIMEO: error = copyin(PTRIN(args->optval), &linux_tv, sizeof(linux_tv)); if (error != 0) return (error); tv.tv_sec = linux_tv.tv_sec; tv.tv_usec = linux_tv.tv_usec; return (kern_setsockopt(td, args->s, level, name, &tv, UIO_SYSSPACE, sizeof(tv))); /* NOTREACHED */ default: break; } break; case IPPROTO_IP: if (args->optname == LINUX_IP_RECVERR && linux_ignore_ip_recverr) { /* * XXX: This is a hack to unbreak DNS resolution * with glibc 2.30 and above. */ return (0); } name = linux_to_bsd_ip_sockopt(args->optname); break; case IPPROTO_IPV6: name = linux_to_bsd_ip6_sockopt(args->optname); break; case IPPROTO_TCP: name = linux_to_bsd_tcp_sockopt(args->optname); break; default: name = -1; break; } if (name == -1) { linux_msg(curthread, "unsupported setsockopt level %d optname %d", args->level, args->optname); return (ENOPROTOOPT); } if (name == IPV6_NEXTHOP) { len = args->optlen; error = linux_to_bsd_sockaddr(PTRIN(args->optval), &sa, &len); if (error != 0) return (error); error = kern_setsockopt(td, args->s, level, name, sa, UIO_SYSSPACE, len); free(sa, M_SONAME); } else { error = kern_setsockopt(td, args->s, level, name, PTRIN(args->optval), UIO_USERSPACE, args->optlen); } return (error); } int linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args) { l_timeval linux_tv; struct timeval tv; socklen_t tv_len, xulen, len; struct l_sockaddr *lsa; struct sockaddr *sa; struct xucred xu; struct l_ucred lxu; int error, level, name, newval; level = linux_to_bsd_sockopt_level(args->level); switch (level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(args->optname); switch (name) { case SO_RCVTIMEO: /* FALLTHROUGH */ case SO_SNDTIMEO: tv_len = sizeof(tv); error = kern_getsockopt(td, args->s, level, name, &tv, UIO_SYSSPACE, &tv_len); if (error != 0) return (error); linux_tv.tv_sec = tv.tv_sec; linux_tv.tv_usec = tv.tv_usec; return (copyout(&linux_tv, PTRIN(args->optval), sizeof(linux_tv))); /* NOTREACHED */ case LOCAL_PEERCRED: if (args->optlen < sizeof(lxu)) return (EINVAL); /* * LOCAL_PEERCRED is not served at the SOL_SOCKET level, * but by the Unix socket's level 0. */ level = 0; xulen = sizeof(xu); error = kern_getsockopt(td, args->s, level, name, &xu, UIO_SYSSPACE, &xulen); if (error != 0) return (error); lxu.pid = xu.cr_pid; lxu.uid = xu.cr_uid; lxu.gid = xu.cr_gid; return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu))); /* NOTREACHED */ case SO_ERROR: len = sizeof(newval); error = kern_getsockopt(td, args->s, level, name, &newval, UIO_SYSSPACE, &len); if (error != 0) return (error); - newval = -SV_ABI_ERRNO(td->td_proc, newval); + newval = -linux_to_bsd_errno(newval); return (copyout(&newval, PTRIN(args->optval), len)); /* NOTREACHED */ default: break; } break; case IPPROTO_IP: name = linux_to_bsd_ip_sockopt(args->optname); break; case IPPROTO_IPV6: name = linux_to_bsd_ip6_sockopt(args->optname); break; case IPPROTO_TCP: name = linux_to_bsd_tcp_sockopt(args->optname); break; default: name = -1; break; } if (name == -1) { linux_msg(curthread, "unsupported getsockopt level %d optname %d", args->level, args->optname); return (EINVAL); } if (name == IPV6_NEXTHOP) { error = copyin(PTRIN(args->optlen), &len, sizeof(len)); if (error != 0) return (error); sa = malloc(len, M_SONAME, M_WAITOK); error = kern_getsockopt(td, args->s, level, name, sa, UIO_SYSSPACE, &len); if (error != 0) goto out; error = bsd_to_linux_sockaddr(sa, &lsa, len); if (error == 0) error = copyout(lsa, PTRIN(args->optval), len); free(lsa, M_SONAME); if (error == 0) error = copyout(&len, PTRIN(args->optlen), sizeof(len)); out: free(sa, M_SONAME); } else { if (args->optval) { error = copyin(PTRIN(args->optlen), &len, sizeof(len)); if (error != 0) return (error); } error = kern_getsockopt(td, args->s, level, name, PTRIN(args->optval), UIO_USERSPACE, &len); if (error == 0) error = copyout(&len, PTRIN(args->optlen), sizeof(len)); } return (error); } static int linux_sendfile_common(struct thread *td, l_int out, l_int in, l_loff_t *offset, l_size_t count) { off_t bytes_read; int error; l_loff_t current_offset; struct file *fp; AUDIT_ARG_FD(in); error = fget_read(td, in, &cap_pread_rights, &fp); if (error != 0) return (error); if (offset != NULL) { current_offset = *offset; } else { error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? fo_seek(fp, 0, SEEK_CUR, td) : ESPIPE; if (error != 0) goto drop; current_offset = td->td_uretoff.tdu_off; } bytes_read = 0; /* Linux cannot have 0 count. */ if (count <= 0 || current_offset < 0) { error = EINVAL; goto drop; } error = fo_sendfile(fp, out, NULL, NULL, current_offset, count, &bytes_read, 0, td); if (error != 0) goto drop; current_offset += bytes_read; if (offset != NULL) { *offset = current_offset; } else { error = fo_seek(fp, current_offset, SEEK_SET, td); if (error != 0) goto drop; } td->td_retval[0] = (ssize_t)bytes_read; drop: fdrop(fp, td); return (error); } int linux_sendfile(struct thread *td, struct linux_sendfile_args *arg) { /* * Differences between FreeBSD and Linux sendfile: * - Linux doesn't send anything when count is 0 (FreeBSD uses 0 to * mean send the whole file.) In linux_sendfile given fds are still * checked for validity when the count is 0. * - Linux can send to any fd whereas FreeBSD only supports sockets. * The same restriction follows for linux_sendfile. * - Linux doesn't have an equivalent for FreeBSD's flags and sf_hdtr. * - Linux takes an offset pointer and updates it to the read location. * FreeBSD takes in an offset and a 'bytes read' parameter which is * only filled if it isn't NULL. We use this parameter to update the * offset pointer if it exists. * - Linux sendfile returns bytes read on success while FreeBSD * returns 0. We use the 'bytes read' parameter to get this value. */ l_loff_t offset64; l_long offset; int ret; int error; if (arg->offset != NULL) { error = copyin(arg->offset, &offset, sizeof(offset)); if (error != 0) return (error); offset64 = (l_loff_t)offset; } ret = linux_sendfile_common(td, arg->out, arg->in, arg->offset != NULL ? &offset64 : NULL, arg->count); if (arg->offset != NULL) { #if defined(__i386__) || defined(__arm__) || \ (defined(__amd64__) && defined(COMPAT_LINUX32)) if (offset64 > INT32_MAX) return (EOVERFLOW); #endif offset = (l_long)offset64; error = copyout(&offset, arg->offset, sizeof(offset)); if (error != 0) return (error); } return (ret); } #if defined(__i386__) || defined(__arm__) || \ (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sendfile64(struct thread *td, struct linux_sendfile64_args *arg) { l_loff_t offset; int ret; int error; if (arg->offset != NULL) { error = copyin(arg->offset, &offset, sizeof(offset)); if (error != 0) return (error); } ret = linux_sendfile_common(td, arg->out, arg->in, arg->offset != NULL ? &offset : NULL, arg->count); if (arg->offset != NULL) { error = copyout(&offset, arg->offset, sizeof(offset)); if (error != 0) return (error); } return (ret); } /* Argument list sizes for linux_socketcall */ static const unsigned char lxs_args_cnt[] = { 0 /* unused*/, 3 /* socket */, 3 /* bind */, 3 /* connect */, 2 /* listen */, 3 /* accept */, 3 /* getsockname */, 3 /* getpeername */, 4 /* socketpair */, 4 /* send */, 4 /* recv */, 6 /* sendto */, 6 /* recvfrom */, 2 /* shutdown */, 5 /* setsockopt */, 5 /* getsockopt */, 3 /* sendmsg */, 3 /* recvmsg */, 4 /* accept4 */, 5 /* recvmmsg */, 4 /* sendmmsg */, 4 /* sendfile */ }; #define LINUX_ARGS_CNT (nitems(lxs_args_cnt) - 1) #define LINUX_ARG_SIZE(x) (lxs_args_cnt[x] * sizeof(l_ulong)) int linux_socketcall(struct thread *td, struct linux_socketcall_args *args) { l_ulong a[6]; #if defined(__amd64__) && defined(COMPAT_LINUX32) register_t l_args[6]; #endif void *arg; int error; if (args->what < LINUX_SOCKET || args->what > LINUX_ARGS_CNT) return (EINVAL); error = copyin(PTRIN(args->args), a, LINUX_ARG_SIZE(args->what)); if (error != 0) return (error); #if defined(__amd64__) && defined(COMPAT_LINUX32) for (int i = 0; i < lxs_args_cnt[args->what]; ++i) l_args[i] = a[i]; arg = l_args; #else arg = a; #endif switch (args->what) { case LINUX_SOCKET: return (linux_socket(td, arg)); case LINUX_BIND: return (linux_bind(td, arg)); case LINUX_CONNECT: return (linux_connect(td, arg)); case LINUX_LISTEN: return (linux_listen(td, arg)); case LINUX_ACCEPT: return (linux_accept(td, arg)); case LINUX_GETSOCKNAME: return (linux_getsockname(td, arg)); case LINUX_GETPEERNAME: return (linux_getpeername(td, arg)); case LINUX_SOCKETPAIR: return (linux_socketpair(td, arg)); case LINUX_SEND: return (linux_send(td, arg)); case LINUX_RECV: return (linux_recv(td, arg)); case LINUX_SENDTO: return (linux_sendto(td, arg)); case LINUX_RECVFROM: return (linux_recvfrom(td, arg)); case LINUX_SHUTDOWN: return (linux_shutdown(td, arg)); case LINUX_SETSOCKOPT: return (linux_setsockopt(td, arg)); case LINUX_GETSOCKOPT: return (linux_getsockopt(td, arg)); case LINUX_SENDMSG: return (linux_sendmsg(td, arg)); case LINUX_RECVMSG: return (linux_recvmsg(td, arg)); case LINUX_ACCEPT4: return (linux_accept4(td, arg)); case LINUX_RECVMMSG: return (linux_recvmmsg(td, arg)); case LINUX_SENDMMSG: return (linux_sendmmsg(td, arg)); case LINUX_SENDFILE: return (linux_sendfile(td, arg)); } linux_msg(td, "socket type %d not implemented", args->what); return (ENOSYS); } #endif /* __i386__ || __arm__ || (__amd64__ && COMPAT_LINUX32) */ Index: head/sys/i386/i386/elf_machdep.c =================================================================== --- head/sys/i386/i386/elf_machdep.c (revision 365831) +++ head/sys/i386/i386/elf_machdep.c (revision 365832) @@ -1,303 +1,301 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); static Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { npxgetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_386_IRELATIVE); } #define ERI_LOCAL 0x0001 /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup, int flags) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if ((flags & ERI_LOCAL) != 0) { if (rtype == R_386_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } return (0); } switch (rtype) { case R_386_NONE: /* none */ break; case R_386_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; if (*where != addr) *where = addr; break; case R_386_PC32: /* S + A - P */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend - (Elf_Addr)where; if (*where != addr) *where = addr; break; case R_386_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_386_GLOB_DAT: /* S */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_386_RELATIVE: break; case R_386_IRELATIVE: addr = relocbase + addend; addr = ((Elf_Addr (*)(void))addr)(); if (*where != addr) *where = addr; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup, 0)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup, ERI_LOCAL)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } Index: head/sys/i386/linux/linux_sysvec.c =================================================================== --- head/sys/i386/linux/linux_sysvec.c (revision 365831) +++ head/sys/i386/linux/linux_sysvec.c (revision 365832) @@ -1,1103 +1,1099 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup(uintptr_t *stack_base, struct image_params *iparams); static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *iparams); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack); static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static int linux_szplatform; const char *linux_kplatform; static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; static eventhandler_tag linux_thread_dtor_tag; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)args->argc + 1); base--; suword(base, (intptr_t)envp); base--; suword(base, (intptr_t)argv); base--; suword(base, imgp->args->argc); *stack_base = (uintptr_t)base; return (0); } static int linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) { struct proc *p; Elf32_Auxargs *args; Elf32_Auxinfo *argarray, *pos; Elf32_Addr *uplatform; struct ps_strings *arginfo; int error, issetugid; p = imgp->proc; issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); args = (Elf32_Auxargs *)imgp->auxargs; argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * LINUX_AT_COUNT); free(argarray, M_TEMP); return (error); } static int linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) { register_t *base; base = (register_t *)*stack_base; base--; if (suword(base, (register_t)imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } /* * Copied from kern/kern_exec.c */ static int linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, error; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* Calculate string base and vector table pointers. */ p = imgp->proc; if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (uintptr_t)arginfo; /* Install LINUX_PLATFORM. */ destp -= linux_szplatform; destp = rounddown2(destp, sizeof(void *)); error = copyout(linux_kplatform, (void *)destp, linux_szplatform); if (error != 0) return (error); if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= roundup(sizeof(canary), sizeof(void *)); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); /* Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has LINUX_AT_COUNT entries. */ destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* vectp also becomes our initial stack base. */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* Fill in "ps_strings" struct for ps, w, etc. */ if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* A null vector table pointer separates the argp's from the envp's. */ if (suword(vectp++, 0) != 0) return (EFAULT); if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* The end of the vector table is a null pointer. */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int sig, code; int oonstack; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; frame.sf_siginfo = &fp->sf_si; frame.sf_ucontext = &fp->sf_sc; /* Fill in POSIX parts. */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = NULL; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_gs = rgs(); frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_rt_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int sig, code; int oonstack; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; sig = ksi->ksi_signo; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = rgs(); frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_edi; frame.sf_sc.sc_esi = regs->tf_esi; frame.sf_sc.sc_ebp = regs->tf_ebp; frame.sf_sc.sc_ebx = regs->tf_ebx; frame.sf_sc.sc_esp = regs->tf_esp; frame.sf_sc.sc_edx = regs->tf_edx; frame.sf_sc.sc_ecx = regs->tf_ecx; frame.sf_sc.sc_eax = regs->tf_eax; frame.sf_sc.sc_eip = regs->tf_eip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_eflags; frame.sf_sc.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; l_sigset_t lmask; sigset_t bmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_es = frame.sf_sc.sc_es; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_edi = frame.sf_sc.sc_edi; regs->tf_esi = frame.sf_sc.sc_esi; regs->tf_ebp = frame.sf_sc.sc_ebp; regs->tf_ebx = frame.sf_sc.sc_ebx; regs->tf_edx = frame.sf_sc.sc_edx; regs->tf_ecx = frame.sf_sc.sc_ecx; regs->tf_eax = frame.sf_sc.sc_eax; regs->tf_eip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_eflags = eflags; regs->tf_esp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_edi = context->sc_edi; regs->tf_esi = context->sc_esi; regs->tf_ebp = context->sc_ebp; regs->tf_ebx = context->sc_ebx; regs->tf_edx = context->sc_edx; regs->tf_ecx = context->sc_ecx; regs->tf_eax = context->sc_eax; regs->tf_eip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_eflags = eflags; regs->tf_esp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; /* Call sigaltstack & ignore results. */ lss = &uc.uc_stack; ss.ss_sp = lss->ss_sp; ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; sa->code = frame->tf_eax; sa->args[0] = frame->tf_ebx; sa->args[1] = frame->tf_ecx; sa->args[2] = frame->tf_edx; sa->args[3] = frame->tf_esi; sa->args[4] = frame->tf_edi; sa->args[5] = frame->tf_ebp; /* Unconfirmed */ if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; cpu_set_syscall_retval(td, error); if (__predict_false(error != 0)) { if (error != ERESTART && error != EJUSTRETURN) - frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error); + frame->tf_eax = linux_to_bsd_errno(error); } } /* * exec_setregs may initialize some registers differently than Linux * does, thus potentially confusing Linux binaries. If necessary, we * override the exec_setregs default(s) here. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct pcb *pcb = td->td_pcb; exec_setregs(td, imgp, stack); /* Linux sets %gs to 0, we default to _udatasel. */ pcb->pcb_gs = 0; load_gs(0); pcb->pcb_initial_npxcw = __LINUX_NPXCW__; } static void linux_get_machine(const char **dst) { switch (cpu_class) { case CPUCLASS_686: *dst = "i686"; break; case CPUCLASS_586: *dst = "i586"; break; case CPUCLASS_486: *dst = "i486"; break; default: *dst = "i386"; } } struct sysentvec linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_errsize = ELAST + 1, - .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux a.out", .sv_coredump = NULL, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, - .sv_errsize = ELAST + 1, - .sv_errtbl = linux_errtbl, .sv_transtrap = linux_translate_traps, .sv_fixup = linux_fixup_elf, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF", .sv_coredump = elf32_coredump, .sv_imgact_try = linux_exec_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = LINUX_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = linux_copyout_auxargs, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); } SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (false); /* * For Linux we encode osrel using the Linux convention of * (version << 16) | (major << 8) | (minor) * See macro in linux_mib.h */ *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); return (true); } static Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = linux_emul_path, .interp_path = "/lib/ld-musl-i386.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_get_machine(&linux_kplatform); linux_szplatform = roundup(strlen(linux_kplatform) + 1, sizeof(char *)); linux_dev_shm_create(); linux_osd_jail_register(); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_dev_shm_destroy(); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); FEATURE(linux, "Linux 32bit support"); Index: head/sys/kern/imgact_aout.c =================================================================== --- head/sys/kern/imgact_aout.c (revision 365831) +++ head/sys/kern/imgact_aout.c (revision 365832) @@ -1,348 +1,344 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #include #include #include #include #endif static int exec_aout_imgact(struct image_params *imgp); static int aout_fixup(uintptr_t *stack_base, struct image_params *imgp); #define AOUT32_USRSTACK 0xbfc00000 #if defined(__i386__) #define AOUT32_PS_STRINGS (AOUT32_USRSTACK - sizeof(struct ps_strings)) struct sysentvec aout_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = aout_fixup, .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; #elif defined(__amd64__) #define AOUT32_PS_STRINGS \ (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings)) #define AOUT32_MINUSER FREEBSD32_MINUSER extern const char *freebsd32_syscallnames[]; extern u_long ia32_maxssiz; struct sysentvec aout_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = aout_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = ia32_sigcode, .sv_szsigcode = &sz_ia32_sigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = AOUT32_MINUSER, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, }; #else #error "Port me" #endif static int aout_fixup(uintptr_t *stack_base, struct image_params *imgp) { *stack_base -= sizeof(uint32_t); if (suword32((void *)*stack_base, imgp->args->argc) != 0) return (EFAULT); return (0); } static int exec_aout_imgact(struct image_params *imgp) { const struct exec *a_out = (const struct exec *) imgp->image_header; struct vmspace *vmspace; vm_map_t map; vm_object_t object; vm_offset_t text_end, data_end; unsigned long virtual_offset; unsigned long file_offset; unsigned long bss_size; int error; /* * Linux and *BSD binaries look very much alike, * only the machine id is different: * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI. * NetBSD is in network byte order.. ugh. */ if (((a_out->a_midmag >> 16) & 0xff) != 0x86 && ((a_out->a_midmag >> 16) & 0xff) != 0 && ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86) return -1; /* * Set file/virtual offset based on a.out variant. * We do two cases: host byte order and network byte order * (for NetBSD compatibility) */ switch ((int)(a_out->a_midmag & 0xffff)) { case ZMAGIC: virtual_offset = 0; if (a_out->a_text) { file_offset = PAGE_SIZE; } else { /* Bill's "screwball mode" */ file_offset = 0; } break; case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; /* Pass PS_STRINGS for BSD/OS binaries only. */ if (N_GETMID(*a_out) == MID_ZERO) imgp->ps_strings = (void *)aout_sysvec.sv_psstrings; break; default: /* NetBSD compatibility */ switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) { case ZMAGIC: case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; break; default: return (-1); } } bss_size = roundup(a_out->a_bss, PAGE_SIZE); /* * Check various fields in header for validity/bounds. */ if (/* entry point must lay with text region */ a_out->a_entry < virtual_offset || a_out->a_entry >= virtual_offset + a_out->a_text || /* text and data size must each be page rounded */ a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK #ifdef __amd64__ || /* overflows */ virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX #endif ) return (-1); /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > imgp->attr->va_size) return (EFAULT); /* * text/data/bss must not exceed limits */ PROC_LOCK(imgp->proc); if (/* text can't exceed maximum text size */ a_out->a_text > maxtsiz || /* data + bss can't exceed rlimit */ a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) || racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(imgp->proc); return (ENOMEM); } PROC_UNLOCK(imgp->proc); /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. */ VOP_UNLOCK(imgp->vp); /* * Destroy old process VM and create a new one (with a new stack) */ error = exec_new_vmspace(imgp, &aout_sysvec); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error) return (error); /* * The vm space can be changed by exec_new_vmspace */ vmspace = imgp->proc->p_vmspace; object = imgp->object; map = &vmspace->vm_map; vm_map_lock(map); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; error = vm_map_insert(map, object, file_offset, virtual_offset, text_end, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } VOP_SET_TEXT_CHECKED(imgp->vp); data_end = text_end + a_out->a_data; if (a_out->a_data) { vm_object_reference(object); error = vm_map_insert(map, object, file_offset + a_out->a_text, text_end, data_end, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } VOP_SET_TEXT_CHECKED(imgp->vp); } if (bss_size) { error = vm_map_insert(map, NULL, 0, data_end, data_end + bss_size, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); return (error); } } vm_map_unlock(map); /* Fill in process VM information */ vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT; vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset; vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; imgp->proc->p_sysent = &aout_sysvec; return (0); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw aout_execsw = { .ex_imgact = exec_aout_imgact, .ex_name = "a.out" }; EXEC_SET(aout, aout_execsw); Index: head/sys/kern/init_main.c =================================================================== --- head/sys/kern/init_main.c (revision 365831) +++ head/sys/kern/init_main.c (revision 365832) @@ -1,839 +1,837 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread0_storage thread0_st __aligned(32); struct vmspace vmspace0; struct proc *initproc; int linux_alloc_current_noop(struct thread *td __unused, int flags __unused) { return (0); } int (*lkpi_alloc_current)(struct thread *, int) = linux_alloc_current_noop; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); #ifdef VERBOSE_SYSINIT /* * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to * dictate the default VERBOSE_SYSINIT behavior. Significant values for this * option and associated tunable are: * - 0, 'compiled in but silent by default' * - 1, 'compiled in but verbose by default' (default) */ int verbose_sysinit = VERBOSE_SYSINIT; TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit); #endif #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit table itself. Items are checked off as the are run. * If we want to register new sysinit types, add them to newsysinit. */ SET_DECLARE(sysinit_set, struct sysinit); struct sysinit **sysinit, **sysinit_end; struct sysinit **newsysinit, **newsysinit_end; /* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinit **newset; struct sysinit **sipp; struct sysinit **xipp; int count; count = set_end - set; if (newsysinit) count += newsysinit_end - newsysinit; else count += sysinit_end - sysinit; newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); if (newset == NULL) panic("cannot malloc for sysinit"); xipp = newset; if (newsysinit) for (sipp = newsysinit; sipp < newsysinit_end; sipp++) *xipp++ = *sipp; else for (sipp = sysinit; sipp < sysinit_end; sipp++) *xipp++ = *sipp; for (sipp = set; sipp < set_end; sipp++) *xipp++ = *sipp; if (newsysinit) free(newsysinit, M_TEMP); newsysinit = newset; newsysinit_end = newset + count; } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { struct sysinit **sipp; /* system initialization*/ struct sysinit **xipp; /* interior loop of sort*/ struct sysinit *save; /* bubble*/ #if defined(VERBOSE_SYSINIT) int last; int verbose; #endif TSENTER(); if (boothowto & RB_VERBOSE) bootverbose++; if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); } restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } #if defined(VERBOSE_SYSINIT) last = SI_SUB_COPYRIGHT; verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if ((*sipp)->subsystem == SI_SUB_DONE) continue; #if defined(VERBOSE_SYSINIT) if ((*sipp)->subsystem > last && verbose_sysinit != 0) { verbose = 1; last = (*sipp)->subsystem; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)(*sipp)->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)(*sipp)->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, (*sipp)->udata); } #endif /* Call function */ (*((*sipp)->func))((*sipp)->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ (*sipp)->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) free(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } TSEXIT(); /* Here so we don't overlap with start_init. */ mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_FOURTH, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_FOURTH, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH, print_caddr_t, diag_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "null", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = 0, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; /* * The two following SYSINIT's are proc0 specific glue code. I am not * convinced that they can not be safely combined, but their order of * operation has been maintained as the same as the original init_main.c * for right now. */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; struct uidinfo tmpuinfo; struct loginclass tmplc = { .lc_name = "", }; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; p->p_flag2 = 0; p->p_state = PRS_NORMAL; p->p_klist = knlist_alloc(&p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; /* pid_max cannot be greater than PID_MAX */ td->td_tid = PID_MAX + 1; LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); td->td_state = TDS_RUNNING; td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = curcpu; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); td->td_domain.dr_policy = td->td_cpuset->cs_domain; prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; p->p_treeflag |= P_TREE_REAPER; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, 1); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ /* A hack to prevent uifind from tripping over NULL pointers. */ curthread->td_ucred = newcred; tmpuinfo.ui_uid = 1; newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo; newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_loginclass = &tmplc; newcred->cr_loginclass = loginclass_find("default"); /* End hack. creds get properly set later with thread_cow_get_proc */ curthread->td_ucred = NULL; newcred->cr_prison = &prison0; newcred->cr_users++; /* avoid assertion failure */ proc_set_cred_init(p, newcred); newcred->cr_users--; crfree(newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_fd = fdinit(NULL, false, NULL); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_free_count()); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; PROC_LOCK(p); thread_cow_get_proc(td, p); PROC_UNLOCK(p); /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; vmspace0.vm_refcnt = 1; pmap_pinit0(vmspace_pmap(&vmspace0)); /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_DIRECT_INVOKE(process_init, p); EVENTHANDLER_DIRECT_INVOKE(thread_init, td); EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; PROC_STATUNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { struct image_args args; int error; char *var, *path; char *free_init_path, *tmp_init_path; struct thread *td; struct proc *p; struct vmspace *oldvmspace; TSENTER(); /* Here so we don't overlap with mi_startup. */ td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } free_init_path = tmp_init_path = strdup(init_path, M_TEMP); while ((path = strsep(&tmp_init_path, ":")) != NULL) { if (bootverbose) printf("start_init: trying %s\n", path); memset(&args, 0, sizeof(args)); error = exec_alloc_args(&args); if (error != 0) panic("%s: Can't allocate space for init arguments %d", __func__, error); error = exec_args_add_fname(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add fname %d", __func__, error); error = exec_args_add_arg(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); if (boothowto & RB_SINGLE) error = exec_args_add_arg(&args, "-s", UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); oldvmspace = td->td_proc->p_vmspace; error = kern_execve(td, &args, NULL); KASSERT(error != 0, ("kern_execve returned success, not EJUSTRETURN")); if (error == EJUSTRETURN) { if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } free(free_init_path, M_TEMP); TSEXIT(); return; } if (error != ENOENT) printf("exec %s: error %d\n", path, error); } free(free_init_path, M_TEMP); printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in its own address space. We do this * early to reserve pid 1. Note special case - do not make it * runnable yet, init execution is started when userspace can be served. */ static void create_init(const void *udata __unused) { struct fork_req fr; struct ucred *newcred, *oldcred; struct thread *td; int error; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &initproc; error = fork1(&thread0, &fr); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); crcowfree(td); td->td_realucred = crcowget(initproc->p_ucred); td->td_ucred = td->td_realucred; PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); Index: head/sys/mips/mips/elf_machdep.c =================================================================== --- head/sys/mips/mips/elf_machdep.c (revision 365831) +++ head/sys/mips/mips/elf_machdep.c (revision 365832) @@ -1,510 +1,508 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: src/sys/i386/i386/elf_machdep.c,v 1.20 2004/08/11 02:35:05 marcel */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct sysentvec elf_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, #ifdef __mips_n64 .sv_name = "FreeBSD ELF64", #else .sv_name = "FreeBSD ELF32", #endif .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, #ifdef __mips_n64 .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_ASLR, #else .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_ASLR, #endif .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; static __ElfN(Brandinfo) freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &__elfN(freebsd_brandnote), .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) __elfN(insert_brand_entry), &freebsd_brand_info); void __elfN(dump_thread)(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } /* * The following MIPS relocation code for tracking multiple * consecutive HI32/LO32 entries is because of the following: * * https://dmz-portal.mips.com/wiki/MIPS_relocation_types * * === * * + R_MIPS_HI16 * * An R_MIPS_HI16 must be followed eventually by an associated R_MIPS_LO16 * relocation record in the same SHT_REL section. The contents of the two * fields to be relocated are combined to form a full 32-bit addend AHL. * An R_MIPS_LO16 entry which does not immediately follow a R_MIPS_HI16 is * combined with the most recent one encountered, i.e. multiple R_MIPS_LO16 * entries may be associated with a single R_MIPS_HI16. Use of these * relocation types in a SHT_REL section is discouraged and may be * forbidden to avoid this complication. * * A GNU extension allows multiple R_MIPS_HI16 records to share the same * R_MIPS_LO16 relocation record(s). The association works like this within * a single relocation section: * * + From the beginning of the section moving to the end of the section, * until R_MIPS_LO16 is not found each found R_MIPS_HI16 relocation will * be associated with the first R_MIPS_LO16. * * + Until another R_MIPS_HI16 record is found all found R_MIPS_LO16 * relocations found are associated with the last R_MIPS_HI16. * * === * * This is so gcc can do dead code detection/removal without having to * generate HI/LO pairs even if one of them would be deleted. * * So, the summary is: * * + A HI16 entry must occur before any LO16 entries; * + Multiple consecutive HI16 RELA entries need to be buffered until the * first LO16 RELA entry occurs - and then all HI16 RELA relocations use * the offset in the LOW16 RELA for calculating their offsets; * + The last HI16 RELA entry before a LO16 RELA entry is used (the AHL) * for the first subsequent LO16 calculation; * + If multiple consecutive LO16 RELA entries occur, only the first * LO16 RELA entry triggers an update of buffered HI16 RELA entries; * any subsequent LO16 RELA entry before another HI16 RELA entry will * not cause any further updates to the HI16 RELA entries. * * Additionally, flush out any outstanding HI16 entries that don't have * a LO16 entry in case some garbage entries are left in the file. */ struct mips_tmp_reloc; struct mips_tmp_reloc { struct mips_tmp_reloc *next; Elf_Addr ahl; Elf32_Addr *where_hi16; }; static struct mips_tmp_reloc *ml = NULL; /* * Add a temporary relocation (ie, a HI16 reloc type.) */ static int mips_tmp_reloc_add(Elf_Addr ahl, Elf32_Addr *where_hi16) { struct mips_tmp_reloc *r; r = malloc(sizeof(struct mips_tmp_reloc), M_TEMP, M_NOWAIT); if (r == NULL) { printf("%s: failed to malloc\n", __func__); return (0); } r->ahl = ahl; r->where_hi16 = where_hi16; r->next = ml; ml = r; return (1); } /* * Flush the temporary relocation list. * * This should be done after a file is completely loaded * so no stale relocations exist to confuse the next * load. */ static void mips_tmp_reloc_flush(void) { struct mips_tmp_reloc *r, *rn; r = ml; ml = NULL; while (r != NULL) { rn = r->next; free(r, M_TEMP); r = rn; } } /* * Get an entry from the reloc list; or NULL if we've run out. */ static struct mips_tmp_reloc * mips_tmp_reloc_get(void) { struct mips_tmp_reloc *r; r = ml; if (r == NULL) return (NULL); ml = ml->next; return (r); } /* * Free a relocation entry. */ static void mips_tmp_reloc_free(struct mips_tmp_reloc *r) { free(r, M_TEMP); } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf32_Addr *where = (Elf32_Addr *)NULL; Elf_Addr addr; Elf_Addr addend = (Elf_Addr)0; Elf_Word rtype = (Elf_Word)0, symidx; struct mips_tmp_reloc *r; const Elf_Rel *rel = NULL; const Elf_Rela *rela = NULL; int error; /* Store the last seen ahl from a HI16 for LO16 processing */ static Elf_Addr last_ahl; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf32_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); switch (rtype) { case R_MIPS_64: addend = *(Elf64_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf32_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } switch (rtype) { case R_MIPS_NONE: /* none */ break; case R_MIPS_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*where != addr) *where = (Elf32_Addr)addr; break; case R_MIPS_26: /* ((A << 2) | (P & 0xf0000000) + S) >> 2 */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addend &= 0x03ffffff; /* * Addendum for .rela R_MIPS_26 is not shifted right */ if (rela == NULL) addend <<= 2; addr += ((Elf_Addr)where & 0xf0000000) | addend; addr >>= 2; *where &= ~0x03ffffff; *where |= addr & 0x03ffffff; break; case R_MIPS_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*(Elf64_Addr*)where != addr) *(Elf64_Addr*)where = addr; break; /* * Handle the two GNU extension cases: * * + Multiple HI16s followed by a LO16, and * + A HI16 followed by multiple LO16s. * * The former is tricky - the HI16 relocations need * to be buffered until a LO16 occurs, at which point * each HI16 is replayed against the LO16 relocation entry * (with the relevant overflow information.) * * The latter should be easy to handle - when the * first LO16 is seen, write out and flush the * HI16 buffer. Any subsequent LO16 entries will * find a blank relocation buffer. * */ case R_MIPS_HI16: /* ((AHL + S) - ((short)(AHL + S)) >> 16 */ if (rela != NULL) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= ((((long long) addr + 0x8000LL) >> 16) & 0xffff); } else { /* * Add a temporary relocation to the list; * will pop it off / free the list when * we've found a suitable HI16. */ if (mips_tmp_reloc_add(addend << 16, where) == 0) return (-1); /* * Track the last seen HI16 AHL for use by * the first LO16 AHL calculation. * * The assumption is any intermediary deleted * LO16's were optimised out, so the last * HI16 before the LO16 is the "true" relocation * entry to use for that LO16 write. */ last_ahl = addend << 16; } break; case R_MIPS_LO16: /* AHL + S */ if (rela != NULL) { error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= addr & 0xffff; } else { Elf_Addr tmp_ahl; Elf_Addr tmp_addend; tmp_ahl = last_ahl + (int16_t) addend; error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); tmp_addend = addend & 0xffff0000; /* Use the last seen ahl for calculating addend */ tmp_addend |= (uint16_t)(tmp_ahl + addr); *where = tmp_addend; /* * This logic implements the "we saw multiple HI16 * before a LO16" assignment /and/ "we saw multiple * LO16s". * * Multiple LO16s will be handled as a blank * relocation list. * * Multple HI16's are iterated over here. */ while ((r = mips_tmp_reloc_get()) != NULL) { Elf_Addr rahl; /* * We have the ahl from the HI16 entry, so * offset it by the 16 bits of the low ahl. */ rahl = r->ahl; rahl += (int16_t) addend; tmp_addend = *(r->where_hi16); tmp_addend &= 0xffff0000; tmp_addend |= ((rahl + addr) - (int16_t)(rahl + addr)) >> 16; *(r->where_hi16) = tmp_addend; mips_tmp_reloc_free(r); } } break; case R_MIPS_HIGHER: /* %higher(A+S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= (((long long)addr + 0x80008000LL) >> 32) & 0xffff; break; case R_MIPS_HIGHEST: /* %highest(A+S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where &= 0xffff0000; *where |= (((long long)addr + 0x800080008000LL) >> 48) & 0xffff; break; default: printf("kldload: unexpected relocation type %d\n", rtype); return (-1); } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { /* * Sync the I and D caches to make sure our relocations are visible. */ mips_icache_sync_all(); /* Flush outstanding relocations */ mips_tmp_reloc_flush(); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } Index: head/sys/mips/mips/freebsd32_machdep.c =================================================================== --- head/sys/mips/mips/freebsd32_machdep.c (revision 365831) +++ head/sys/mips/mips/freebsd32_machdep.c (revision 365832) @@ -1,475 +1,473 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Juli Mallett * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Based on nwhitehorn's COMPAT_FREEBSD32 support code for PowerPC64. */ #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int get_mcontext32(struct thread *, mcontext32_t *, int); static int set_mcontext32(struct thread *, mcontext32_t *); static void freebsd32_sendsig(sig_t, ksiginfo_t *, sigset_t *); extern const char *freebsd32_syscallnames[]; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = freebsd32_sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = ((vm_offset_t)0x80000000), .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); int set_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; for (i = 0; i < NUMSAVEREGS; i++) r.r_regs[i] = regs->r_regs[i]; return (set_regs(td, &r)); } int fill_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; int error; error = fill_regs(td, &r); if (error != 0) return (error); for (i = 0; i < NUMSAVEREGS; i++) regs->r_regs[i] = r.r_regs[i]; return (0); } int set_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; for (i = 0; i < NUMFPREGS; i++) fp.r_regs[i] = fpregs->r_regs[i]; return (set_fpregs(td, &fp)); } int fill_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; int error; error = fill_fpregs(td, &fp); if (error != 0) return (error); for (i = 0; i < NUMFPREGS; i++) fpregs->r_regs[i] = fp.r_regs[i]; return (0); } static int get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { mcontext_t mcp64; unsigned i; int error; error = get_mcontext(td, &mcp64, flags); if (error != 0) return (error); mcp->mc_onstack = mcp64.mc_onstack; mcp->mc_pc = mcp64.mc_pc; for (i = 0; i < 32; i++) mcp->mc_regs[i] = mcp64.mc_regs[i]; mcp->sr = mcp64.sr; mcp->mullo = mcp64.mullo; mcp->mulhi = mcp64.mulhi; mcp->mc_fpused = mcp64.mc_fpused; for (i = 0; i < 33; i++) mcp->mc_fpregs[i] = mcp64.mc_fpregs[i]; mcp->mc_fpc_eir = mcp64.mc_fpc_eir; mcp->mc_tls = (int32_t)(intptr_t)mcp64.mc_tls; return (0); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { mcontext_t mcp64; unsigned i; mcp64.mc_onstack = mcp->mc_onstack; mcp64.mc_pc = mcp->mc_pc; for (i = 0; i < 32; i++) mcp64.mc_regs[i] = mcp->mc_regs[i]; mcp64.sr = mcp->sr; mcp64.mullo = mcp->mullo; mcp64.mulhi = mcp->mulhi; mcp64.mc_fpused = mcp->mc_fpused; for (i = 0; i < 33; i++) mcp64.mc_fpregs[i] = mcp->mc_fpregs[i]; mcp64.mc_fpc_eir = mcp->mc_fpc_eir; mcp64.mc_tls = (void *)(intptr_t)mcp->mc_tls; return (set_mcontext(td, &mcp64)); } int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); #if 0 CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); #endif return (EJUSTRETURN); } /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } #define UCONTEXT_MAGIC 0xACEDBADE /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct fpreg32 fpregs; struct reg32 regs; struct sigacts *psp; struct sigframe32 sf, *sfp; int sig; int oonstack; unsigned i; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); fill_regs32(td, ®s); oonstack = sigonstack(td->td_frame->sp); /* save user context */ bzero(&sf, sizeof sf); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp; sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC]; sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO]; sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI]; sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls; sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ for (i = 1; i < 32; i++) sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i]; sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); fill_fpregs32(td, &fpregs); for (i = 0; i < 33; i++) sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i]; } /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe32 *)(((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); } else sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); /* Build the argument list for the signal handler. */ td->td_frame->a0 = sig; td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si; /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = td->td_frame->badvaddr; } else { /* Old FreeBSD-style arguments. */ td->td_frame->a1 = ksi->ksi_code; td->td_frame->a3 = td->td_frame->badvaddr; /* sf.sf_ahu.sf_handler = catcher; */ } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); } td->td_frame->pc = (register_t)(intptr_t)catcher; td->td_frame->t9 = (register_t)(intptr_t)catcher; td->td_frame->sp = (register_t)(intptr_t)sfp; /* * Signal trampoline code is at base of user stack. */ td->td_frame->ra = (register_t)(intptr_t)FREEBSD32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) { int error; int32_t tlsbase; switch (uap->op) { case MIPS_SET_TLS: td->td_md.md_tls = (void *)(intptr_t)uap->parms; return (0); case MIPS_GET_TLS: tlsbase = (int32_t)(intptr_t)td->td_md.md_tls; error = copyout(&tlsbase, uap->parms, sizeof(tlsbase)); return (error); default: break; } return (EINVAL); } void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } Index: head/sys/powerpc/powerpc/elf32_machdep.c =================================================================== --- head/sys/powerpc/powerpc/elf32_machdep.c (revision 365831) +++ head/sys/powerpc/powerpc/elf32_machdep.c (revision 365832) @@ -1,466 +1,464 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __powerpc64__ #include #include extern const char *freebsd32_syscallnames[]; static void ppc32_fixlimit(struct rlimit *rl, int which); static SYSCTL_NODE(_compat, OID_AUTO, ppc32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "32-bit mode"); #define PPC32_MAXDSIZ (1024*1024*1024) static u_long ppc32_maxdsiz = PPC32_MAXDSIZ; SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ppc32_maxdsiz, 0, ""); #define PPC32_MAXSSIZ (64*1024*1024) u_long ppc32_maxssiz = PPC32_MAXSSIZ; SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ppc32_maxssiz, 0, ""); #else static void ppc32_runtime_resolve(void); #endif struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, #ifdef __powerpc64__ .sv_table = freebsd32_sysent, #else .sv_table = sysent, #endif - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs), .sv_sendsig = sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_stackprot = VM_PROT_ALL, #ifdef __powerpc64__ .sv_maxuser = VM_MAXUSER_ADDRESS32, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ppc32_setregs, .sv_syscallnames = freebsd32_syscallnames, .sv_fixlimit = ppc32_fixlimit, #else .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_syscallnames = syscallnames, .sv_fixlimit = NULL, #endif .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_ASLR | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, #ifdef __powerpc64__ .interp_newpath = "/libexec/ld-elf32.so.1", #else .interp_newpath = NULL, #endif .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; uint64_t vshr[32]; uint64_t *vsr_dw1; int vsr_idx; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf32_populate_note(NT_PPC_VMX, &pcb->pcb_vec, (char *)dst + len, sizeof(pcb->pcb_vec), NULL); } else len += elf32_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } if (pcb->pcb_flags & PCB_VSX) { save_fpu_nodrop(td); if (dst != NULL) { /* * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and * VSR32-VSR63 overlap with VR0-VR31, so we only copy * the non-overlapping data, which is doubleword 1 of VSR0-VSR31. */ for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) { vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2]; vshr[vsr_idx] = *vsr_dw1; } len += elf32_populate_note(NT_PPC_VSX, vshr, (char *)dst + len, sizeof(vshr), NULL); } else len += elf32_populate_note(NT_PPC_VSX, NULL, NULL, sizeof(vshr), NULL); } *off = len; } #ifndef __powerpc64__ bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Half *hwhere; Elf_Addr addr; Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset); hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC_ADDR32: /* word32 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; *where = elf_relocaddr(lf, addr + addend); break; case R_PPC_ADDR16_LO: /* #lo(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = addr & 0xffff; break; case R_PPC_ADDR16_HA: /* #ha(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0)) & 0xffff; break; case R_PPC_RELATIVE: /* word32 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; case R_PPC_JMP_SLOT: /* PLT jump slot entry */ /* * We currently only support Secure-PLT jump slots. * Given that we reject BSS-PLT modules during load, we * don't need to check again. * The method we are using here is equivilent to * LD_BIND_NOW. */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; *where = elf_relocaddr(lf, addr + addend); break; case R_PPC_IRELATIVE: addr = relocbase + addend; val = ((Elf32_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d\n", (int) rtype); return -1; } return(0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = NULL, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } static void ppc32_runtime_resolve() { /* * Since we don't support lazy binding, panic immediately if anyone * manages to call the runtime resolver. */ panic("kldload: Runtime resolver was called unexpectedly!"); } int elf_cpu_parse_dynamic(caddr_t loadbase, Elf_Dyn *dynamic) { Elf_Dyn *dp; bool has_plt = false; bool secure_plt = false; Elf_Addr *got; for (dp = dynamic; dp->d_tag != DT_NULL; dp++) { switch (dp->d_tag) { case DT_PPC_GOT: secure_plt = true; got = (Elf_Addr *)(loadbase + dp->d_un.d_ptr); /* Install runtime resolver canary. */ got[1] = (Elf_Addr)ppc32_runtime_resolve; got[2] = (Elf_Addr)0; break; case DT_PLTGOT: has_plt = true; break; } } if (has_plt && !secure_plt) { printf("kldload: BSS-PLT modules are not supported.\n"); return (-1); } return (0); } #endif #ifdef __powerpc64__ static void ppc32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ppc32_maxdsiz != 0) { if (rl->rlim_cur > ppc32_maxdsiz) rl->rlim_cur = ppc32_maxdsiz; if (rl->rlim_max > ppc32_maxdsiz) rl->rlim_max = ppc32_maxdsiz; } break; case RLIMIT_STACK: if (ppc32_maxssiz != 0) { if (rl->rlim_cur > ppc32_maxssiz) rl->rlim_cur = ppc32_maxssiz; if (rl->rlim_max > ppc32_maxssiz) rl->rlim_max = ppc32_maxssiz; } break; } } #endif Index: head/sys/powerpc/powerpc/elf64_machdep.c =================================================================== --- head/sys/powerpc/powerpc/elf64_machdep.c (revision 365831) +++ head/sys/powerpc/powerpc/elf64_machdep.c (revision 365832) @@ -1,453 +1,449 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, uintptr_t stack); struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64, .sv_szsigcode = &szsigcode64, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs_funcdesc, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, }; struct sysentvec elf64_freebsd_sysvec_v2 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64, /* Fixed up in ppc64_init_sysvecs(). */ .sv_szsigcode = &szsigcode64, .sv_name = "FreeBSD ELF64 V2", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, }; static boolean_t ppc64_elfv1_header_match(struct image_params *params, int32_t *, uint32_t *); static boolean_t ppc64_elfv2_header_match(struct image_params *params, int32_t *, uint32_t *); static Elf64_Brandinfo freebsd_brand_info_elfv1 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv1); static Elf64_Brandinfo freebsd_brand_info_elfv2 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v2, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv2_header_match }; SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv2); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); static void ppc64_init_sysvecs(void *arg) { exec_sysvec_init(&elf64_freebsd_sysvec_v2); exec_sysvec_init_secondary(&elf64_freebsd_sysvec_v2, &elf64_freebsd_sysvec_v1); /* * Adjust elfv2 sigcode after elfv1 sysvec is initialized. * exec_sysvec_init_secondary() assumes secondary sysvecs use * identical signal code, and skips allocating a second copy. * Since the ELFv2 trampoline is a strict subset of the ELFv1 code, * we can work around this by adjusting the base address. This also * avoids two copies of the trampoline code being allocated! */ elf64_freebsd_sysvec_v2.sv_sigcode_base += (uintptr_t)sigcode64_elfv2 - (uintptr_t)&sigcode64; elf64_freebsd_sysvec_v2.sv_szsigcode = &szsigcode64_elfv2; } SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, ppc64_init_sysvecs, NULL); static boolean_t ppc64_elfv1_header_match(struct image_params *params, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 0 || abi == 1); } static boolean_t ppc64_elfv2_header_match(struct image_params *params, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 2); } static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf; register_t entry_desc[3]; tf = trapframe(td); exec_setregs(td, imgp, stack); /* * For 64-bit ELFv1, we need to disentangle the function * descriptor * * 0. entry point * 1. TOC value (r2) * 2. Environment pointer (r11) */ (void)copyin((void *)imgp->entry_addr, entry_desc, sizeof(entry_desc)); tf->srr0 = entry_desc[0] + imgp->reloc_base; tf->fixreg[2] = entry_desc[1] + imgp->reloc_base; tf->fixreg[11] = entry_desc[2] + imgp->reloc_base; } void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; uint64_t vshr[32]; uint64_t *vsr_dw1; int vsr_idx; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf64_populate_note(NT_PPC_VMX, &pcb->pcb_vec, (char *)dst + len, sizeof(pcb->pcb_vec), NULL); } else len += elf64_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } if (pcb->pcb_flags & PCB_VSX) { save_fpu_nodrop(td); if (dst != NULL) { /* * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and * VSR32-VSR63 overlap with VR0-VR31, so we only copy * the non-overlapping data, which is doubleword 1 of VSR0-VSR31. */ for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) { vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2]; vshr[vsr_idx] = *vsr_dw1; } len += elf64_populate_note(NT_PPC_VSX, vshr, (char *)dst + len, sizeof(vshr), NULL); } else len += elf64_populate_note(NT_PPC_VSX, NULL, NULL, sizeof(vshr), NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC64_ADDR64: /* doubleword64 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; *where = addr; break; case R_PPC_RELATIVE: /* doubleword64 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; case R_PPC_JMP_SLOT: /* function descriptor copy */ lookup(lf, symidx, 1, &addr); #if !defined(_CALL_ELF) || _CALL_ELF == 1 memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr)); #else *where = addr; #endif __asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory"); break; case R_PPC_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d\n", (int) rtype); return -1; } return(0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = NULL, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } Index: head/sys/riscv/riscv/elf_machdep.c =================================================================== --- head/sys/riscv/riscv/elf_machdep.c (revision 365831) +++ head/sys/riscv/riscv/elf_machdep.c (revision 365832) @@ -1,526 +1,524 @@ /*- * Copyright 1996-1998 John D. Polstra. * Copyright (c) 2015 Ruslan Bukin * Copyright (c) 2016 Yukishige Shibata * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static const char *riscv_machine_arch(struct proc *p); u_long elf_hwcap; struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, - .sv_errsize = 0, - .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_machine_arch = riscv_machine_arch, }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static const char * riscv_machine_arch(struct proc *p) { if ((p->p_elf_flags & EF_RISCV_FLOAT_ABI_MASK) == EF_RISCV_FLOAT_ABI_SOFT) return (MACHINE_ARCH "sf"); return (MACHINE_ARCH); } static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_RISCV, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static bool debug_kld; SYSCTL_BOOL(_debug, OID_AUTO, kld_reloc, CTLFLAG_RW, &debug_kld, 0, "Activate debug prints in elf_reloc_internal()"); struct type2str_ent { int type; const char *str; }; void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { } /* * Following 4 functions are used to manupilate bits on 32bit interger value. * FIXME: I implemetend for ease-to-understand rather than for well-optimized. */ static uint32_t gen_bitmask(int msb, int lsb) { uint32_t mask; if (msb == sizeof(mask) * 8 - 1) mask = ~0; else mask = (1U << (msb + 1)) - 1; if (lsb > 0) mask &= ~((1U << lsb) - 1); return (mask); } static uint32_t extract_bits(uint32_t x, int msb, int lsb) { uint32_t mask; mask = gen_bitmask(msb, lsb); x &= mask; x >>= lsb; return (x); } static uint32_t insert_bits(uint32_t d, uint32_t s, int msb, int lsb) { uint32_t mask; mask = gen_bitmask(msb, lsb); d &= ~mask; s <<= lsb; s &= mask; return (d | s); } static uint32_t insert_imm(uint32_t insn, uint32_t imm, int imm_msb, int imm_lsb, int insn_lsb) { int insn_msb; uint32_t v; v = extract_bits(imm, imm_msb, imm_lsb); insn_msb = (imm_msb - imm_lsb) + insn_lsb; return (insert_bits(insn, v, insn_msb, insn_lsb)); } /* * The RISC-V ISA is designed so that all of immediate values are * sign-extended. * An immediate value is sometimes generated at runtime by adding * 12bit sign integer and 20bit signed integer. This requests 20bit * immediate value to be ajusted if the MSB of the 12bit immediate * value is asserted (sign-extended value is treated as negative value). * * For example, 0x123800 can be calculated by adding upper 20 bit of * 0x124000 and sign-extended 12bit immediate whose bit pattern is * 0x800 as follows: * 0x123800 * = 0x123000 + 0x800 * = (0x123000 + 0x1000) + (-0x1000 + 0x800) * = (0x123000 + 0x1000) + (0xff...ff800) * = 0x124000 + sign-extention(0x800) */ static uint32_t calc_hi20_imm(uint32_t value) { /* * There is the arithmetical hack that can remove conditional * statement. But I implement it in straightforward way. */ if ((value & 0x800) != 0) value += 0x1000; return (value & ~0xfff); } static const struct type2str_ent t2s[] = { { R_RISCV_NONE, "R_RISCV_NONE" }, { R_RISCV_64, "R_RISCV_64" }, { R_RISCV_JUMP_SLOT, "R_RISCV_JUMP_SLOT" }, { R_RISCV_RELATIVE, "R_RISCV_RELATIVE" }, { R_RISCV_JAL, "R_RISCV_JAL" }, { R_RISCV_CALL, "R_RISCV_CALL" }, { R_RISCV_PCREL_HI20, "R_RISCV_PCREL_HI20" }, { R_RISCV_PCREL_LO12_I, "R_RISCV_PCREL_LO12_I" }, { R_RISCV_PCREL_LO12_S, "R_RISCV_PCREL_LO12_S" }, { R_RISCV_HI20, "R_RISCV_HI20" }, { R_RISCV_LO12_I, "R_RISCV_LO12_I" }, { R_RISCV_LO12_S, "R_RISCV_LO12_S" }, }; static const char * reloctype_to_str(int type) { int i; for (i = 0; i < sizeof(t2s) / sizeof(t2s[0]); ++i) { if (type == t2s[i].type) return t2s[i].str; } return "*unknown*"; } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* * Currently kernel loadable module for RISCV is compiled with -fPIC option. * (see also additional CFLAGS definition for RISCV in sys/conf/kmod.mk) * Only R_RISCV_64, R_RISCV_JUMP_SLOT and RISCV_RELATIVE are emitted in * the module. Other relocations will be processed when kernel loadable * modules are built in non-PIC. * * FIXME: only RISCV64 is supported. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Size rtype, symidx; const Elf_Rela *rela; Elf_Addr val, addr; Elf64_Addr *where; Elf_Addr addend; uint32_t before32_1; uint32_t before32; uint64_t before64; uint32_t *insn32p; uint32_t imm20; int error; switch (type) { case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *)(relocbase + rela->r_offset); insn32p = (uint32_t *)where; addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: printf("%s:%d unknown reloc type %d\n", __FUNCTION__, __LINE__, type); return (-1); } switch (rtype) { case R_RISCV_NONE: break; case R_RISCV_64: case R_RISCV_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; before64 = *where; if (*where != val) *where = val; if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_RELATIVE: before64 = *where; *where = elf_relocaddr(lf, relocbase + addend); if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_JAL: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; if (val <= -(1UL << 20) || (1UL << 20) <= val) { printf("kldload: huge offset against R_RISCV_JAL\n"); return (-1); } before32 = *insn32p; *insn32p = insert_imm(*insn32p, val, 20, 20, 31); *insn32p = insert_imm(*insn32p, val, 10, 1, 21); *insn32p = insert_imm(*insn32p, val, 11, 11, 20); *insn32p = insert_imm(*insn32p, val, 19, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_CALL: /* * R_RISCV_CALL relocates 8-byte region that consists * of the sequence of AUIPC and JALR. */ /* Calculate and check the pc relative offset. */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; if (val <= -(1UL << 32) || (1UL << 32) <= val) { printf("kldload: huge offset against R_RISCV_CALL\n"); return (-1); } /* Relocate AUIPC. */ before32 = insn32p[0]; imm20 = calc_hi20_imm(val); insn32p[0] = insert_imm(insn32p[0], imm20, 31, 12, 12); /* Relocate JALR. */ before32_1 = insn32p[1]; insn32p[1] = insert_imm(insn32p[1], val, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x %08x -> %08x %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, insn32p[0], before32_1, insn32p[1]); break; case R_RISCV_PCREL_HI20: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; insn32p = (uint32_t *)where; before32 = *insn32p; imm20 = calc_hi20_imm(val); *insn32p = insert_imm(*insn32p, imm20, 31, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_PCREL_LO12_I: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_PCREL_LO12_S: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 5, 25); *insn32p = insert_imm(*insn32p, addr, 4, 0, 7); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_HI20: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; insn32p = (uint32_t *)where; before32 = *insn32p; imm20 = calc_hi20_imm(val); *insn32p = insert_imm(*insn32p, imm20, 31, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_LO12_I: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_LO12_S: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 5, 25); *insn32p = insert_imm(*insn32p, addr, 4, 0, 7); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; default: printf("kldload: unexpected relocation type %ld\n", rtype); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } Index: head/sys/sys/sysent.h =================================================================== --- head/sys/sys/sysent.h (revision 365831) +++ head/sys/sys/sysent.h (revision 365832) @@ -1,333 +1,329 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSENT_H_ #define _SYS_SYSENT_H_ #include struct rlimit; struct sysent; struct thread; struct ksiginfo; struct syscall_args; enum systrace_probe_t { SYSTRACE_ENTRY, SYSTRACE_RETURN, }; typedef int sy_call_t(struct thread *, void *); typedef void (*systrace_probe_func_t)(struct syscall_args *, enum systrace_probe_t, int); typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *); #ifdef _KERNEL extern systrace_probe_func_t systrace_probe_func; extern bool systrace_enabled; #ifdef KDTRACE_HOOKS #define SYSTRACE_ENABLED() (systrace_enabled) #else #define SYSTRACE_ENABLED() (0) #endif #endif /* _KERNEL */ struct sysent { /* system call table */ int sy_narg; /* number of arguments */ sy_call_t *sy_call; /* implementing function */ au_event_t sy_auevent; /* audit event associated with syscall */ systrace_args_func_t sy_systrace_args_func; /* optional argument conversion function. */ u_int32_t sy_entry; /* DTrace entry ID for systrace. */ u_int32_t sy_return; /* DTrace return ID for systrace. */ u_int32_t sy_flags; /* General flags for system calls. */ u_int32_t sy_thrcnt; }; /* * A system call is permitted in capability mode. */ #define SYF_CAPENABLED 0x00000001 #define SY_THR_FLAGMASK 0x7 #define SY_THR_STATIC 0x1 #define SY_THR_DRAINING 0x2 #define SY_THR_ABSENT 0x4 #define SY_THR_INCR 0x8 #ifdef KLD_MODULE #define SY_THR_STATIC_KLD 0 #else #define SY_THR_STATIC_KLD SY_THR_STATIC #endif struct image_params; struct proc; struct __sigset; struct trapframe; struct vnode; struct sysentvec { int sv_size; /* number of entries */ struct sysent *sv_table; /* pointer to sysent */ - int sv_errsize; /* size of errno translation table */ - const int *sv_errtbl; /* errno translation table */ int (*sv_transtrap)(int, int); /* translate trap-to-signal mapping */ int (*sv_fixup)(uintptr_t *, struct image_params *); /* stack fixup function */ void (*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *); /* send signal */ char *sv_sigcode; /* start of sigtramp code */ int *sv_szsigcode; /* size of sigtramp code */ char *sv_name; /* name of binary type */ int (*sv_coredump)(struct thread *, struct vnode *, off_t, int); /* function to dump core, or NULL */ int (*sv_imgact_try)(struct image_params *); void (*sv_stackgap)(struct image_params *, uintptr_t *); int (*sv_copyout_auxargs)(struct image_params *, uintptr_t); int sv_minsigstksz; /* minimum signal stack size */ vm_offset_t sv_minuser; /* VM_MIN_ADDRESS */ vm_offset_t sv_maxuser; /* VM_MAXUSER_ADDRESS */ vm_offset_t sv_usrstack; /* USRSTACK */ vm_offset_t sv_psstrings; /* PS_STRINGS */ int sv_stackprot; /* vm protection for stack */ int (*sv_copyout_strings)(struct image_params *, uintptr_t *); void (*sv_setregs)(struct thread *, struct image_params *, uintptr_t); void (*sv_fixlimit)(struct rlimit *, int); u_long *sv_maxssiz; u_int sv_flags; void (*sv_set_syscall_retval)(struct thread *, int); int (*sv_fetch_syscall_args)(struct thread *); const char **sv_syscallnames; vm_offset_t sv_timekeep_base; vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_base; void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); int (*sv_trap)(struct thread *); u_long *sv_hwcap; /* Value passed in AT_HWCAP. */ u_long *sv_hwcap2; /* Value passed in AT_HWCAP2. */ const char *(*sv_machine_arch)(struct proc *); }; #define SV_ILP32 0x000100 /* 32-bit executable. */ #define SV_LP64 0x000200 /* 64-bit executable. */ #define SV_IA32 0x004000 /* Intel 32-bit executable. */ #define SV_AOUT 0x008000 /* a.out executable. */ #define SV_SHP 0x010000 /* Shared page. */ #define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */ #define SV_TIMEKEEP 0x040000 /* Shared page timehands. */ #define SV_ASLR 0x080000 /* ASLR allowed. */ #define SV_ABI_MASK 0xff -#define SV_ABI_ERRNO(p, e) ((p)->p_sysent->sv_errsize <= 0 ? e : \ - ((e) >= (p)->p_sysent->sv_errsize ? -1 : (p)->p_sysent->sv_errtbl[e])) #define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x)) #define SV_PROC_ABI(p) ((p)->p_sysent->sv_flags & SV_ABI_MASK) #define SV_CURPROC_FLAG(x) SV_PROC_FLAG(curproc, x) #define SV_CURPROC_ABI() SV_PROC_ABI(curproc) /* same as ELFOSABI_XXX, to prevent header pollution */ #define SV_ABI_LINUX 3 #define SV_ABI_FREEBSD 9 #define SV_ABI_CLOUDABI 17 #define SV_ABI_UNDEF 255 #ifdef _KERNEL extern struct sysentvec aout_sysvec; extern struct sysent sysent[]; extern const char *syscallnames[]; #define NO_SYSCALL (-1) struct module; struct syscall_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ int *offset; /* offset into sysent */ struct sysent *new_sysent; /* new sysent */ struct sysent old_sysent; /* old sysent */ int flags; /* flags for syscall_register */ }; /* separate initialization vector so it can be used in a substructure */ #define SYSENT_INIT_VALS(_syscallname) { \ .sy_narg = (sizeof(struct _syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)&sys_##_syscallname, \ .sy_auevent = SYS_AUE_##_syscallname, \ .sy_systrace_args_func = NULL, \ .sy_entry = 0, \ .sy_return = 0, \ .sy_flags = 0, \ .sy_thrcnt = 0 \ } #define MAKE_SYSENT(syscallname) \ static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname); #define MAKE_SYSENT_COMPAT(syscallname) \ static struct sysent syscallname##_sysent = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& syscallname, \ SYS_AUE_##syscallname \ } #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg) \ static struct syscall_module_data name##_syscall_mod = { \ evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL } \ }; \ \ static moduledata_t name##_mod = { \ "sys/" #name, \ syscall_module_handler, \ &name##_syscall_mod \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE) #define SYSCALL_MODULE_HELPER(syscallname) \ static int syscallname##_syscall = SYS_##syscallname; \ MAKE_SYSENT(syscallname); \ SYSCALL_MODULE(syscallname, \ & syscallname##_syscall, & syscallname##_sysent, \ NULL, NULL) #define SYSCALL_MODULE_PRESENT(syscallname) \ (sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys && \ sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys) /* * Syscall registration helpers with resource allocation handling. */ struct syscall_helper_data { struct sysent new_sysent; struct sysent old_sysent; int syscall_no; int registered; }; #define SYSCALL_INIT_HELPER_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& sys_ ## syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER(syscallname) \ SYSCALL_INIT_HELPER_F(syscallname, 0) #define SYSCALL_INIT_HELPER_COMPAT(syscallname) \ SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0) #define SYSCALL_INIT_LAST { \ .syscall_no = NO_SYSCALL \ } int syscall_module_handler(struct module *mod, int what, void *arg); int syscall_helper_register(struct syscall_helper_data *sd, int flags); int syscall_helper_unregister(struct syscall_helper_data *sd); /* Implementation, exposed for COMPAT code */ int kern_syscall_register(struct sysent *sysents, int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags); int kern_syscall_deregister(struct sysent *sysents, int offset, const struct sysent *old_sysent); int kern_syscall_module_handler(struct sysent *sysents, struct module *mod, int what, void *arg); int kern_syscall_helper_register(struct sysent *sysents, struct syscall_helper_data *sd, int flags); int kern_syscall_helper_unregister(struct sysent *sysents, struct syscall_helper_data *sd); struct proc; const char *syscallname(struct proc *p, u_int code); /* Special purpose system call functions. */ struct nosys_args; int lkmnosys(struct thread *, struct nosys_args *); int lkmressys(struct thread *, struct nosys_args *); int _syscall_thread_enter(struct thread *td, struct sysent *se); void _syscall_thread_exit(struct thread *td, struct sysent *se); static inline int syscall_thread_enter(struct thread *td, struct sysent *se) { if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0)) return (0); return (_syscall_thread_enter(td, se)); } static inline void syscall_thread_exit(struct thread *td, struct sysent *se) { if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0)) return; _syscall_thread_exit(td, se); } int shared_page_alloc(int size, int align); int shared_page_fill(int size, int align, const void *data); void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); void exec_sysvec_init_secondary(struct sysentvec *sv, struct sysentvec *sv2); void exec_inittk(void); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ (sysinit_cfunc_t)exec_sysvec_init, sv); #endif /* _KERNEL */ #endif /* !_SYS_SYSENT_H_ */