diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index 340b2bb3b017..b1fd0c033743 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -1,445 +1,443 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vdso_offsets.h" extern const char _binary_elf_vdso_so_1_start[]; extern const char _binary_elf_vdso_so_1_end[]; extern char _binary_elf_vdso_so_1_size; struct sysentvec elf64_freebsd_sysvec_la48 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = _binary_elf_vdso_so_1_start, .sv_szsigcode = (int *)&_binary_elf_vdso_so_1_size, .sv_sigcodeoff = VDSO_SIGCODE_OFFSET, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA48, .sv_usrstack = USRSTACK_LA48, .sv_psstrings = PS_STRINGS_LA48, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER | SV_DSO_SIG, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE_LA48, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval = x86_set_fork_retval, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; struct sysentvec elf64_freebsd_sysvec_la57 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = _binary_elf_vdso_so_1_start, .sv_szsigcode = (int *)&_binary_elf_vdso_so_1_size, .sv_sigcodeoff = VDSO_SIGCODE_OFFSET, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA57, .sv_usrstack = USRSTACK_LA57, .sv_psstrings = PS_STRINGS_LA57, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER | SV_DSO_SIG, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE_LA57, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval= x86_set_fork_retval, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; static void amd64_init_sysvecs(void *arg) { amd64_lower_shared_page(&elf64_freebsd_sysvec_la48); if (la57) { exec_sysvec_init(&elf64_freebsd_sysvec_la57); exec_sysvec_init_secondary(&elf64_freebsd_sysvec_la57, &elf64_freebsd_sysvec_la48); } else { exec_sysvec_init(&elf64_freebsd_sysvec_la48); } } SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, amd64_init_sysvecs, NULL); void amd64_lower_shared_page(struct sysentvec *sv) { if (hw_lower_amd64_sharedpage != 0) { sv->sv_maxuser -= PAGE_SIZE; sv->sv_shared_page_base -= PAGE_SIZE; sv->sv_usrstack -= PAGE_SIZE; sv->sv_psstrings -= PAGE_SIZE; } } static bool freebsd_brand_info_la57_img_compat(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0) { if ((imgp->proc->p_md.md_flags & P_MD_LA57) != 0) return (true); if (fctl0 == NULL || (*fctl0 & NT_FREEBSD_FCTL_LA48) != 0) return (false); if ((imgp->proc->p_md.md_flags & P_MD_LA48) != 0) return (false); return (true); } static Elf64_Brandinfo freebsd_brand_info_la48 = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, }; static Elf64_Brandinfo freebsd_brand_info_la57 = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la57, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = freebsd_brand_info_la57_img_compat, }; static void sysinit_register_elf64_brand_entries(void *arg __unused) { /* * _57 must go first so it can either claim the image or hand * it to _48. */ if (la57) elf64_insert_brand_entry(&freebsd_brand_info_la57); elf64_insert_brand_entry(&freebsd_brand_info_la48); } SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, sysinit_register_elf64_brand_entries, NULL); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo); static Elf64_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .interp_path = "/lib/ld-kfreebsd-x86-64.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info); void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf64_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_X86_64_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, bool late_ifunc, elf_lookup_fn lookup) { Elf64_Addr *where, val; Elf32_Addr *where32, val32; Elf_Addr addr; Elf_Addr addend; Elf_Size rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); /* Addend is 32 bit on 32 bit relocs */ switch (rtype) { case R_X86_64_PC32: case R_X86_64_32S: case R_X86_64_PLT32: addend = *(Elf32_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (late_ifunc) { KASSERT(type == ELF_RELOC_RELA, ("Only RELA ifunc relocations are supported")); if (rtype != R_X86_64_IRELATIVE) return (0); } switch (rtype) { case R_X86_64_NONE: /* none */ break; case R_X86_64_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); val = addr + addend; if (error != 0) return (-1); if (*where != val) *where = val; break; case R_X86_64_PC32: /* S + A - P */ case R_X86_64_PLT32: /* L + A - P, L is PLT location for the symbol, which we treat as S */ error = lookup(lf, symidx, 1, &addr); where32 = (Elf32_Addr *)where; val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where); if (error != 0) return (-1); if (*where32 != val32) *where32 = val32; break; case R_X86_64_32S: /* S + A sign extend */ error = lookup(lf, symidx, 1, &addr); val32 = (Elf32_Addr)(addr + addend); where32 = (Elf32_Addr *)where; if (error != 0) return (-1); if (*where32 != val32) *where32 = val32; break; case R_X86_64_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation, " "symbol index %ld\n", symidx); return (-1); case R_X86_64_GLOB_DAT: /* S */ case R_X86_64_JMP_SLOT: /* XXX need addend + offset */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); if (*where != addr) *where = addr; break; case R_X86_64_RELATIVE: /* B + A */ addr = elf_relocaddr(lf, relocbase + addend); val = addr; if (*where != val) *where = val; break; case R_X86_64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %ld, " "symbol index %ld\n", rtype, symidx); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, false, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, false, lookup)); } int elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, true, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index c0175bda02a3..c77fde315e78 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -1,333 +1,332 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #include "opt_ddb.h" /* for OPT_DDB */ #include "opt_global.h" /* for OPT_KDTRACE_HOOKS */ #include "opt_stack.h" /* for OPT_STACK */ static bool elf32_arm_abi_supported(struct image_params *, int32_t *, uint32_t *); u_long elf_hwcap; u_long elf_hwcap2; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ASLR | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER | SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_hwcap2 = &elf_hwcap2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static bool elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; /* * When configured for EABI, FreeBSD supports EABI vesions 4 and 5. */ if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary (rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (false); } return (true); } void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* * It is possible for the compiler to emit relocations for unaligned data. * We handle this situation with these inlines. */ #define RELOC_ALIGNED_P(x) \ (((uintptr_t)(x) & (sizeof(void *) - 1)) == 0) static __inline Elf_Addr load_ptr(Elf_Addr *where) { Elf_Addr res; if (RELOC_ALIGNED_P(where)) return *where; memcpy(&res, where, sizeof(res)); return (res); } static __inline void store_ptr(Elf_Addr *where, Elf_Addr val) { if (RELOC_ALIGNED_P(where)) *where = val; else memcpy(where, &val, sizeof(val)); } #undef RELOC_ALIGNED_P /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = load_ptr(where); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_ARM_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (load_ptr(where) != addr) store_ptr(where, addr); } return (0); } switch (rtype) { case R_ARM_NONE: /* none */ break; case R_ARM_ABS32: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); store_ptr(where, addr + load_ptr(where)); break; case R_ARM_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation, " "symbol index %d\n", symidx); return (-1); break; case R_ARM_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error == 0) { store_ptr(where, addr); return (0); } return (-1); case R_ARM_RELATIVE: break; default: printf("kldload: unexpected relocation type %d, " "symbol index %d\n", rtype, symidx); return (-1); } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* * The pmap code does not do an icache sync upon establishing executable * mappings in the kernel pmap. It's an optimization based on the fact * that kernel memory allocations always have EXECUTABLE protection even * when the memory isn't going to hold executable code. The only time * kernel memory holding instructions does need a sync is after loading * a kernel module, and that's when this function gets called. * * This syncs data and instruction caches after loading a module. We * don't worry about the kernel itself (lf->id is 1) as locore.S did * that on entry. Even if data cache maintenance was done by IO code, * the relocation fixup process creates dirty cache entries that we must * write back before doing icache sync. The instruction cache sync also * invalidates the branch predictor cache on platforms that have one. */ if (lf->id == 1) return (0); dcache_wb_pou((vm_offset_t)lf->address, (vm_size_t)lf->size); icache_inv_all(); #if defined(DDB) || defined(KDTRACE_HOOKS) || defined(STACK) /* * Inform the stack(9) code of the new module, so it can acquire its * per-module unwind data. */ unwind_module_loaded(lf); #endif return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf) { #if defined(DDB) || defined(KDTRACE_HOOKS) || defined(STACK) /* Inform the stack(9) code that this module is gone. */ unwind_module_unloaded(lf); #endif return (0); } diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 4a0366df99c4..7fa636628f64 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -1,289 +1,288 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014, 2017 Andrew Turner. * Copyright (c) 2018 Olivier Houchard * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #include #define FREEBSD32_MINUSER 0x00001000 #define FREEBSD32_MAXUSER ((1ul << 32) - PAGE_SIZE) #define FREEBSD32_SHAREDPAGE (FREEBSD32_MAXUSER - PAGE_SIZE) #define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE extern const char *freebsd32_syscallnames[]; extern char aarch32_sigcode[]; extern int sz_aarch32_sigcode; static int freebsd32_fetch_syscall_args(struct thread *td); static void freebsd32_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void freebsd32_set_syscall_retval(struct thread *, int); static bool elf32_arm_abi_supported(struct image_params *, int32_t *, uint32_t *); extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); u_long __read_frequently elf32_hwcap; u_long __read_frequently elf32_hwcap2; static struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = freebsd32_sendsig, .sv_sigcode = aarch32_sigcode, .sv_szsigcode = &sz_aarch32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = elf32_prepare_notes, - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_psstringssz = sizeof(struct freebsd32_ps_strings), .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_setregs, .sv_fixlimit = NULL, // XXX .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER, .sv_set_syscall_retval = freebsd32_set_syscall_retval, .sv_fetch_syscall_args = freebsd32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf32_hwcap, .sv_hwcap2 = &elf32_hwcap2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info); static bool elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf32_Ehdr *hdr; /* Check if we support AArch32 */ if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) != ID_AA64PFR0_EL0_64_32) return (false); #define EF_ARM_EABI_FREEBSD_MIN EF_ARM_EABI_VER4 hdr = (const Elf32_Ehdr *)imgp->image_header; if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary " "(rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (false); } return (true); } static int freebsd32_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int error, i, nap, narg; unsigned int args[4]; nap = 4; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; /* r7 is the syscall id */ sa->code = td->td_frame->tf_x[7]; sa->original_code = sa->code; if (sa->code == SYS_syscall) { sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[1]; nap -= 2; ap += 2; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; narg = sa->callp->sy_narg; for (i = 0; i < nap; i++) sa->args[i] = ap[i]; if (narg > nap) { if (narg - nap > nitems(args)) panic("Too many system call arguiments"); error = copyin((void *)td->td_frame->tf_x[13], args, (narg - nap) * sizeof(int)); if (error != 0) return (error); for (i = 0; i < (narg - nap); i++) sa->args[i + nap] = args[i]; } td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } static void freebsd32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ if ((frame->tf_spsr & PSR_T) != 0) frame->tf_elr -= 2; //THUMB_INSN_SIZE; else frame->tf_elr -= 4; //INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_x[0] = error; frame->tf_spsr |= PSR_C; break; } } static void freebsd32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf = td->td_frame; struct pcb *pcb = td->td_pcb; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = stack; /* SP_usr is mapped to x13 */ tf->tf_x[13] = stack; /* LR_usr is mapped to x14 */ tf->tf_x[14] = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; tf->tf_spsr = PSR_M_32; if ((uint32_t)imgp->entry_addr & 1) tf->tf_spsr |= PSR_T; #ifdef VFP vfp_reset_state(td, pcb); #endif /* * Clear debug register state. It is not applicable to the new process. */ bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs)); } void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { } diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c index ba55e7a0a597..6a9dc708cf6e 100644 --- a/sys/arm64/arm64/elf_machdep.c +++ b/sys/arm64/arm64/elf_machdep.c @@ -1,314 +1,313 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014 Andrew Turner. * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linker_if.h" u_long __read_frequently elf_hwcap; u_long __read_frequently elf_hwcap2; struct arm64_addr_mask elf64_addr_mask; static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64 | SV_ASLR | SV_RNG_SEED_VER, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_hwcap2 = &elf_hwcap2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_AARCH64, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static bool get_arm64_addr_mask(struct regset *rs, struct thread *td, void *buf, size_t *sizep) { if (buf != NULL) { KASSERT(*sizep == sizeof(elf64_addr_mask), ("%s: invalid size", __func__)); memcpy(buf, &elf64_addr_mask, sizeof(elf64_addr_mask)); } *sizep = sizeof(elf64_addr_mask); return (true); } static struct regset regset_arm64_addr_mask = { .note = NT_ARM_ADDR_MASK, .size = sizeof(struct arm64_addr_mask), .get = get_arm64_addr_mask, }; ELF_REGSET(regset_arm64_addr_mask); void elf64_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (ELF_R_TYPE(r_info) == R_AARCH64_IRELATIVE); } static int reloc_instr_imm(Elf32_Addr *where, Elf_Addr val, u_int msb, u_int lsb) { /* Check bounds: upper bits must be all ones or all zeros. */ if ((uint64_t)((int64_t)val >> (msb + 1)) + 1 > 1) return (-1); val >>= lsb; val &= (1 << (msb - lsb + 1)) - 1; *where |= (Elf32_Addr)val; return (0); } /* * Process a relocation. Support for some static relocations is required * in order for the -zifunc-noplt optimization to work. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int flags, elf_lookup_fn lookup) { #define ARM64_ELF_RELOC_LOCAL (1 << 0) #define ARM64_ELF_RELOC_LATE_IFUNC (1 << 1) Elf_Addr *where, addr, addend, val; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if ((flags & ARM64_ELF_RELOC_LATE_IFUNC) != 0) { KASSERT(type == ELF_RELOC_RELA, ("Only RELA ifunc relocations are supported")); if (rtype != R_AARCH64_IRELATIVE) return (0); } if ((flags & ARM64_ELF_RELOC_LOCAL) != 0) { if (rtype == R_AARCH64_RELATIVE) *where = elf_relocaddr(lf, relocbase + addend); return (0); } error = 0; switch (rtype) { case R_AARCH64_NONE: case R_AARCH64_RELATIVE: break; case R_AARCH64_TSTBR14: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); error = reloc_instr_imm((Elf32_Addr *)where, addr + addend - (Elf_Addr)where, 15, 2); break; case R_AARCH64_CONDBR19: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); error = reloc_instr_imm((Elf32_Addr *)where, addr + addend - (Elf_Addr)where, 20, 2); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); error = reloc_instr_imm((Elf32_Addr *)where, addr + addend - (Elf_Addr)where, 27, 2); break; case R_AARCH64_ABS64: case R_AARCH64_GLOB_DAT: case R_AARCH64_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); *where = addr + addend; break; case R_AARCH64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d, " "symbol index %d\n", rtype, symidx); return (-1); } return (error); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, ARM64_ELF_RELOC_LOCAL, lookup)); } /* Process one elf relocation with addend. */ int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, ARM64_ELF_RELOC_LATE_IFUNC, lookup)); } int elf_cpu_load_file(linker_file_t lf) { if (lf->id != 1) cpu_icache_sync_range((vm_offset_t)lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index f5aa07dec265..83911c47438d 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -1,246 +1,245 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Doug Rabson * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct ia32_mcontext) == 640); CTASSERT(sizeof(struct ia32_ucontext) == 704); CTASSERT(sizeof(struct ia32_sigframe) == 800); CTASSERT(sizeof(struct siginfo32) == 64); #ifdef COMPAT_FREEBSD4 CTASSERT(sizeof(struct ia32_freebsd4_mcontext) == 260); CTASSERT(sizeof(struct ia32_freebsd4_ucontext) == 324); CTASSERT(sizeof(struct ia32_freebsd4_sigframe) == 408); #endif #include "vdso_ia32_offsets.h" extern const char _binary_elf_vdso32_so_1_start[]; extern const char _binary_elf_vdso32_so_1_end[]; extern char _binary_elf_vdso32_so_1_size; extern const char *freebsd32_syscallnames[]; static SYSCTL_NODE(_compat, OID_AUTO, ia32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "ia32 mode"); static u_long ia32_maxdsiz = IA32_MAXDSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ia32_maxdsiz, 0, ""); u_long ia32_maxssiz = IA32_MAXSSIZ; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ia32_maxssiz, 0, ""); static u_long ia32_maxvmem = IA32_MAXVMEM; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RWTUN, &ia32_maxvmem, 0, ""); struct sysentvec ia32_freebsd_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = _binary_elf_vdso32_so_1_start, .sv_szsigcode = (int *)&_binary_elf_vdso32_so_1_size, .sv_sigcodeoff = VDSO_IA32_SIGCODE_OFFSET, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = elf32_prepare_notes, - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_psstringssz = sizeof(struct freebsd32_ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER | SV_DSO_SIG, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval = x86_set_fork_retval, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec); static Elf32_Brandinfo ia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_info); static Elf32_Brandinfo ia32_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &ia32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &ia32_brand_oinfo); static Elf32_Brandinfo kia32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .interp_path = "/lib/ld.so.1", .sysvec = &ia32_freebsd_sysvec, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kia32_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } void ia32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ia32_maxdsiz != 0) { if (rl->rlim_cur > ia32_maxdsiz) rl->rlim_cur = ia32_maxdsiz; if (rl->rlim_max > ia32_maxdsiz) rl->rlim_max = ia32_maxdsiz; } break; case RLIMIT_STACK: if (ia32_maxssiz != 0) { if (rl->rlim_cur > ia32_maxssiz) rl->rlim_cur = ia32_maxssiz; if (rl->rlim_max > ia32_maxssiz) rl->rlim_max = ia32_maxssiz; } break; case RLIMIT_VMEM: if (ia32_maxvmem != 0) { if (rl->rlim_cur > ia32_maxvmem) rl->rlim_cur = ia32_maxvmem; if (rl->rlim_max > ia32_maxvmem) rl->rlim_max = ia32_maxvmem; } break; } } diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index fbbb3bb3dd46..d8b7ab9c586b 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -1,309 +1,308 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval = x86_set_fork_retval, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); static Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .interp_path = "/lib/ld.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { npxgetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_386_IRELATIVE); } #define ERI_LOCAL 0x0001 /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup, int flags) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if ((flags & ERI_LOCAL) != 0) { if (rtype == R_386_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } return (0); } switch (rtype) { case R_386_NONE: /* none */ break; case R_386_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; if (*where != addr) *where = addr; break; case R_386_PC32: /* S + A - P */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend - (Elf_Addr)where; if (*where != addr) *where = addr; break; case R_386_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation, " "symbol index %d\n", symidx); return (-1); break; case R_386_GLOB_DAT: /* S */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); if (*where != addr) *where = addr; break; case R_386_RELATIVE: break; case R_386_IRELATIVE: addr = relocbase + addend; addr = ((Elf_Addr (*)(void))addr)(); if (*where != addr) *where = addr; break; default: printf("kldload: unexpected relocation type %d, " "symbol index %d\n", rtype, symidx); return (-1); } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup, 0)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, lookup, ERI_LOCAL)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 275fc4e8f3e1..bda6bfeda4c5 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -1,371 +1,369 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #include #include #include #include #endif static int exec_aout_imgact(struct image_params *imgp); static int aout_fixup(uintptr_t *stack_base, struct image_params *imgp); #define AOUT32_USRSTACK 0xbfc00000 #if defined(__i386__) #define AOUT32_PS_STRINGS (AOUT32_USRSTACK - sizeof(struct ps_strings)) struct sysentvec aout_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = aout_fixup, .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval = x86_set_fork_retval, }; #elif defined(__amd64__) #include "vdso_ia32_offsets.h" extern const char _binary_elf_vdso32_so_1_start[]; extern const char _binary_elf_vdso32_so_1_end[]; extern char _binary_elf_vdso32_so_1_size; #define AOUT32_PS_STRINGS \ (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings)) #define AOUT32_MINUSER FREEBSD32_MINUSER extern const char *freebsd32_syscallnames[]; extern u_long ia32_maxssiz; static int aout_szsigcode; struct sysentvec aout_sysvec = { .sv_size = FREEBSD32_SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_fixup = aout_fixup, .sv_sendsig = ia32_sendsig, .sv_sigcode = _binary_elf_vdso32_so_1_start, .sv_szsigcode = &aout_szsigcode, .sv_name = "FreeBSD a.out", .sv_coredump = NULL, - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = AOUT32_MINUSER, .sv_maxuser = AOUT32_USRSTACK, .sv_usrstack = AOUT32_USRSTACK, .sv_psstrings = AOUT32_PS_STRINGS, .sv_psstringssz = sizeof(struct freebsd32_ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval = x86_set_fork_retval, }; static void aout_sysent(void *arg __unused) { aout_szsigcode = (int)(uintptr_t)&_binary_elf_vdso32_so_1_size; } SYSINIT(aout_sysent, SI_SUB_EXEC, SI_ORDER_ANY, aout_sysent, NULL); #else #error "Only ia32 arch is supported" #endif static int aout_fixup(uintptr_t *stack_base, struct image_params *imgp) { *stack_base -= sizeof(uint32_t); if (suword32((void *)*stack_base, imgp->args->argc) != 0) return (EFAULT); return (0); } static int exec_aout_imgact(struct image_params *imgp) { const struct exec *a_out; struct vmspace *vmspace; vm_map_t map; vm_object_t object; vm_offset_t text_end, data_end; unsigned long virtual_offset; unsigned long file_offset; unsigned long bss_size; int error; a_out = (const struct exec *)imgp->image_header; /* * Linux and *BSD binaries look very much alike, * only the machine id is different: * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI. * NetBSD is in network byte order.. ugh. */ if (((a_out->a_midmag >> 16) & 0xff) != 0x86 && ((a_out->a_midmag >> 16) & 0xff) != 0 && ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86) return (-1); /* * Set file/virtual offset based on a.out variant. * We do two cases: host byte order and network byte order * (for NetBSD compatibility) */ switch ((int)(a_out->a_midmag & 0xffff)) { case ZMAGIC: virtual_offset = 0; if (a_out->a_text) { file_offset = PAGE_SIZE; } else { /* Bill's "screwball mode" */ file_offset = 0; } break; case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; /* Pass PS_STRINGS for BSD/OS binaries only. */ if (N_GETMID(*a_out) == MID_ZERO) imgp->ps_strings = (void *)aout_sysvec.sv_psstrings; break; default: /* NetBSD compatibility */ switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) { case ZMAGIC: case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; break; default: return (-1); } } bss_size = roundup(a_out->a_bss, PAGE_SIZE); /* * Check various fields in header for validity/bounds. */ if (/* entry point must lay with text region */ a_out->a_entry < virtual_offset || a_out->a_entry >= virtual_offset + a_out->a_text || /* text and data size must each be page rounded */ a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK #ifdef __amd64__ || /* overflows */ virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX #endif ) return (-1); /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > imgp->attr->va_size) return (EFAULT); /* * text/data/bss must not exceed limits */ PROC_LOCK(imgp->proc); if (/* text can't exceed maximum text size */ a_out->a_text > maxtsiz || /* data + bss can't exceed rlimit */ a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) || racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(imgp->proc); return (ENOMEM); } PROC_UNLOCK(imgp->proc); /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. */ VOP_UNLOCK(imgp->vp); /* * Destroy old process VM and create a new one (with a new stack) */ error = exec_new_vmspace(imgp, &aout_sysvec); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error) return (error); /* * The vm space can be changed by exec_new_vmspace */ vmspace = imgp->proc->p_vmspace; object = imgp->object; map = &vmspace->vm_map; vm_map_lock(map); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; error = vm_map_insert(map, object, file_offset, virtual_offset, text_end, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } VOP_SET_TEXT_CHECKED(imgp->vp); data_end = text_end + a_out->a_data; if (a_out->a_data) { vm_object_reference(object); error = vm_map_insert(map, object, file_offset + a_out->a_text, text_end, data_end, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC); if (error) { vm_map_unlock(map); vm_object_deallocate(object); return (error); } VOP_SET_TEXT_CHECKED(imgp->vp); } if (bss_size) { error = vm_map_insert(map, NULL, 0, data_end, data_end + bss_size, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); return (error); } } vm_map_unlock(map); /* Fill in process VM information */ vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT; vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset; vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); error = exec_map_stack(imgp); if (error != 0) return (error); /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; imgp->proc->p_sysent = &aout_sysvec; return (0); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw aout_execsw = { .ex_imgact = exec_aout_imgact, .ex_name = "a.out" }; EXEC_SET(aout, aout_execsw); diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index e4cb501bc57b..77a0f5478eb6 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,911 +1,910 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kdb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread0_storage thread0_st __aligned(32); struct vmspace vmspace0; struct proc *initproc; int linux_alloc_current_noop(struct thread *td __unused, int flags __unused) { return (0); } int (*lkpi_alloc_current)(struct thread *, int) = linux_alloc_current_noop; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); #ifdef VERBOSE_SYSINIT /* * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to * dictate the default VERBOSE_SYSINIT behavior. Significant values for this * option and associated tunable are: * - 0, 'compiled in but silent by default' * - 1, 'compiled in but verbose by default' (default) */ int verbose_sysinit = VERBOSE_SYSINIT; TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit); #endif #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit table itself. Items are checked off as the are run. * If we want to register new sysinit types, add them to newsysinit. */ SET_DECLARE(sysinit_set, struct sysinit); struct sysinit **sysinit, **sysinit_end; struct sysinit **newsysinit, **newsysinit_end; /* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinit **newset; struct sysinit **sipp; struct sysinit **xipp; int count; count = set_end - set; if (newsysinit) count += newsysinit_end - newsysinit; else count += sysinit_end - sysinit; newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); if (newset == NULL) panic("cannot malloc for sysinit"); xipp = newset; if (newsysinit) for (sipp = newsysinit; sipp < newsysinit_end; sipp++) *xipp++ = *sipp; else for (sipp = sysinit; sipp < sysinit_end; sipp++) *xipp++ = *sipp; for (sipp = set; sipp < set_end; sipp++) *xipp++ = *sipp; if (newsysinit) free(newsysinit, M_TEMP); newsysinit = newset; newsysinit_end = newset + count; } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { struct sysinit **sipp; /* system initialization*/ struct sysinit **xipp; /* interior loop of sort*/ struct sysinit *save; /* bubble*/ int last; #if defined(VERBOSE_SYSINIT) int verbose; #endif TSENTER(); if (boothowto & RB_VERBOSE) bootverbose++; if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); } restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ TSENTER2("bubblesort"); for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } TSEXIT2("bubblesort"); last = SI_SUB_COPYRIGHT; #if defined(VERBOSE_SYSINIT) verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if ((*sipp)->subsystem == SI_SUB_DONE) continue; if ((*sipp)->subsystem > last) BOOTTRACE_INIT("sysinit 0x%7x", (*sipp)->subsystem); #if defined(VERBOSE_SYSINIT) if ((*sipp)->subsystem > last && verbose_sysinit != 0) { verbose = 1; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)(*sipp)->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)(*sipp)->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, (*sipp)->udata); } #endif /* Call function */ (*((*sipp)->func))((*sipp)->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ last = (*sipp)->subsystem; (*sipp)->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) free(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } TSEXIT(); /* Here so we don't overlap with start_init. */ BOOTTRACE("mi_startup done"); mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_FOURTH, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_FOURTH, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH, print_caddr_t, diag_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } static void null_set_fork_retval(struct thread *td __unused) { } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "null", .sv_coredump = NULL, - .sv_imgact_try = NULL, .sv_minsigstksz = 0, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_set_fork_retval = null_set_fork_retval, .sv_regset_begin = NULL, .sv_regset_end = NULL, }; /* * The two following SYSINIT's are proc0 specific glue code. I am not * convinced that they can not be safely combined, but their order of * operation has been maintained as the same as the original init_main.c * for right now. */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; struct uidinfo tmpuinfo; struct loginclass tmplc = { .lc_name = "", }; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; p->p_flag2 = 0; p->p_state = PRS_NORMAL; p->p_klist = knlist_alloc(&p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; td->td_tid = THREAD0_TID; tidhash_add(td); TD_SET_STATE(td, TDS_RUNNING); td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = curcpu; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); td->td_domain.dr_policy = td->td_cpuset->cs_domain; prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; p->p_treeflag |= P_TREE_REAPER; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, 1); TAILQ_INIT(&p->p_kqtim_stop); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ /* A hack to prevent uifind from tripping over NULL pointers. */ curthread->td_ucred = newcred; tmpuinfo.ui_uid = 1; newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo; newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_loginclass = &tmplc; newcred->cr_loginclass = loginclass_find("default"); /* End hack. creds get properly set later with thread_cow_get_proc */ curthread->td_ucred = NULL; newcred->cr_prison = &prison0; newcred->cr_users++; /* avoid assertion failure */ proc_set_cred_init(p, newcred); newcred->cr_users--; crfree(newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_pd = pdinit(NULL, false); p->p_fd = fdinit(); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_free_count()); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; PROC_LOCK(p); thread_cow_get_proc(td, p); PROC_UNLOCK(p); /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; refcount_init(&vmspace0.vm_refcnt, 1); pmap_pinit0(vmspace_pmap(&vmspace0)); /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_DIRECT_INVOKE(process_init, p); EVENTHANDLER_DIRECT_INVOKE(thread_init, td); #ifdef KDTRACE_HOOKS kdtrace_proc_ctor(p); kdtrace_thread_ctor(td); #endif EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; PROC_STATUNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { struct image_args args; int error; char *var, *path; char *free_init_path, *tmp_init_path; struct thread *td; struct proc *p; struct vmspace *oldvmspace; TSENTER(); /* Here so we don't overlap with mi_startup. */ td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); /* For Multicons, report which console is primary to both */ if (boothowto & RB_MULTIPLE) { if (boothowto & RB_SERIAL) printf("Dual Console: Serial Primary, Video Secondary\n"); else printf("Dual Console: Video Primary, Serial Secondary\n"); } if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } free_init_path = tmp_init_path = strdup(init_path, M_TEMP); while ((path = strsep(&tmp_init_path, ":")) != NULL) { if (bootverbose) printf("start_init: trying %s\n", path); memset(&args, 0, sizeof(args)); error = exec_alloc_args(&args); if (error != 0) panic("%s: Can't allocate space for init arguments %d", __func__, error); error = exec_args_add_fname(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add fname %d", __func__, error); error = exec_args_add_arg(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); if (boothowto & RB_SINGLE) error = exec_args_add_arg(&args, "-s", UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); oldvmspace = p->p_vmspace; error = kern_execve(td, &args, NULL, oldvmspace); KASSERT(error != 0, ("kern_execve returned success, not EJUSTRETURN")); if (error == EJUSTRETURN) { exec_cleanup(td, oldvmspace); free(free_init_path, M_TEMP); TSEXIT(); return; } if (error != ENOENT) printf("exec %s: error %d\n", path, error); } free(free_init_path, M_TEMP); printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in its own address space. We do this * early to reserve pid 1. Note special case - do not make it * runnable yet, init execution is started when userspace can be served. */ static void create_init(const void *udata __unused) { struct fork_req fr; struct ucred *newcred, *oldcred; struct thread *td; int error; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &initproc; error = fork1(&thread0, &fr); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); crcowfree(td); td->td_realucred = crcowget(initproc->p_ucred); td->td_ucred = td->td_realucred; PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); /* * DDB(4). */ #ifdef DDB static void db_show_print_syinit(struct sysinit *sip, bool ddb) { const char *sname, *funcname; c_db_sym_t sym; db_expr_t offset; #define xprint(...) \ if (ddb) \ db_printf(__VA_ARGS__); \ else \ printf(__VA_ARGS__) if (sip == NULL) { xprint("%s: no sysinit * given\n", __func__); return; } sym = db_search_symbol((vm_offset_t)sip, DB_STGY_ANY, &offset); db_symbol_values(sym, &sname, NULL); sym = db_search_symbol((vm_offset_t)sip->func, DB_STGY_PROC, &offset); db_symbol_values(sym, &funcname, NULL); xprint("%s(%p)\n", (sname != NULL) ? sname : "", sip); xprint(" %#08x %#08x\n", sip->subsystem, sip->order); xprint(" %p(%s)(%p)\n", sip->func, (funcname != NULL) ? funcname : "", sip->udata); #undef xprint } DB_SHOW_COMMAND_FLAGS(sysinit, db_show_sysinit, DB_CMD_MEMSAFE) { struct sysinit **sipp; db_printf("SYSINIT vs Name(Ptr)\n"); db_printf(" Subsystem Order\n"); db_printf(" Function(Name)(Arg)\n"); for (sipp = sysinit; sipp < sysinit_end; sipp++) { db_show_print_syinit(*sipp, true); if (db_pager_quit) break; } } #endif /* DDB */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 6d273dab3af6..14aac3f374d2 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,2102 +1,2091 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exec, "char *"); SDT_PROBE_DEFINE1(proc, , , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, , , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); int coredump_pack_fileinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, &coredump_pack_fileinfo, 0, "Enable file path packing in 'procstat -f' coredump notes"); int coredump_pack_vmmapinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, &coredump_pack_vmmapinfo, 0, "Enable file path packing in 'procstat -v' coredump notes"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU", "Location of process' ps_strings structure"); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU", "Top of process stack"); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_stackprot, "I", "Stack memory permissions"); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, "Process' command line characters cache limit"); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int core_dump_can_intr = 1; SYSCTL_INT(_kern, OID_AUTO, core_dump_can_intr, CTLFLAG_RWTUN, &core_dump_can_intr, 0, "Core dumping interruptible with SIGKILL"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; vm_offset_t ps_strings; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)PROC_PS_STRINGS(p); return (SYSCTL_OUT(req, &val, sizeof(val))); } #endif ps_strings = PROC_PS_STRINGS(p); return (SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings))); } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; vm_offset_t val; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val32; val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop); return (SYSCTL_OUT(req, &val32, sizeof(val32))); } #endif val = round_page(p->p_vmspace->vm_stacktop); return (SYSCTL_OUT(req, &val, sizeof(val))); } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(struct thread *td, struct execve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; }; #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL, oldvmspace); } post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); #else return (ENOSYS); #endif } int pre_execve(struct thread *td, struct vmspace **oldvmspace) { struct proc *p; int error; KASSERT(td == curthread, ("non-current thread %p", td)); error = 0; p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY) != 0) error = ERESTART; PROC_UNLOCK(p); } KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); *oldvmspace = p->p_vmspace; return (error); } void post_execve(struct thread *td, int error, struct vmspace *oldvmspace) { struct proc *p; KASSERT(td == curthread, ("non-current thread %p", td)); p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == EJUSTRETURN) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } exec_cleanup(td, oldvmspace); } /* * kern_execve() has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace) { TSEXEC(td->td_proc->p_pid, args->begin_argv); AUDIT_ARG_ARGV(args->begin_argv, args->argc, exec_args_get_begin_envv(args) - args->begin_argv); AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc, args->endp - exec_args_get_begin_envv(args)); /* Must have at least one argument. */ if (args->argc == 0) { exec_free_args(args); return (EINVAL); } return (do_execve(td, args, mac_p, oldvmspace)); } static void execve_nosetid(struct image_params *imgp) { imgp->credential_setid = false; if (imgp->newcred != NULL) { crfree(imgp->newcred); imgp->newcred = NULL; } } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace) { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *oldcred; struct uidinfo *euip = NULL; uintptr_t stack_base; struct image_params image_params, *imgp; struct vattr attr; - int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct ktr_io_params *kiop; #endif struct vnode *oldtextvp, *newtextvp; struct vnode *oldtextdvp, *newtextdvp; char *oldbinname, *newbinname; bool credential_changing; #ifdef MAC struct label *interpvplabel = NULL; bool will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif int error, i, orig_osrel; uint32_t orig_fctl0; Elf_Brandinfo *orig_brandinfo; size_t freepath_size; static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; oldtextvp = oldtextdvp = NULL; newtextvp = newtextdvp = NULL; newbinname = oldbinname = NULL; #ifdef KTRACE kiop = NULL; #endif /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; orig_osrel = p->p_osrel; orig_fctl0 = p->p_fctl0; orig_brandinfo = p->p_elf_brandinfo; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif /* * Translate the file name. namei() returns a vnode * pointer in ni_vp among other things. */ NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW | AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE, args->fname); error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; newtextdvp = nd.ni_dvp; nd.ni_dvp = NULL; newbinname = malloc(nd.ni_cnd.cn_namelen + 1, M_PARGS, M_WAITOK); memcpy(newbinname, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen); newbinname[nd.ni_cnd.cn_namelen] = '\0'; imgp->vp = newtextvp; /* * Do the best to calculate the full path to the image file. */ if (args->fname[0] == '/') { imgp->execpath = args->fname; } else { VOP_UNLOCK(imgp->vp); freepath_size = MAXPATHLEN; if (vn_fullpath_hardlink(newtextvp, newtextdvp, newbinname, nd.ni_cnd.cn_namelen, &imgp->execpath, &imgp->freepath, &freepath_size) != 0) imgp->execpath = args->fname; vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } } else if (imgp->interpreter_vp) { /* * An image activator has already provided an open vnode */ newtextvp = imgp->interpreter_vp; imgp->interpreter_vp = NULL; if (vn_fullpath(newtextvp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(newtextvp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * If the descriptors was not opened with O_PATH, then * we require that it was opened with O_EXEC or * O_RDONLY. In either case, exec_check_permissions() * below checks _current_ file access mode regardless * of the permissions additionally checked at the * open(2). */ error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp); if (error != 0) goto exec_fail; if (vn_fullpath(newtextvp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(newtextvp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions. Also 'opens' file and sets its vnode to * text mode. */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; imgp->proc->p_fctl0 = 0; imgp->proc->p_elf_brandinfo = NULL; /* * Implement image setuid/setgid. * * Determine new credentials before attempting image activators * so that it can be used by process_exec handlers to determine * credential/setid changes. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in capability mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = false; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp) != 0; credential_changing |= will_transition; #endif /* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */ if (credential_changing) imgp->proc->p_pdeathsig = 0; if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { imgp->credential_setid = true; VOP_UNLOCK(imgp->vp); imgp->newcred = crdup(oldcred); if (attr.va_mode & S_ISUID) { euip = uifind(attr.va_uid); change_euid(imgp->newcred, euip); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (attr.va_mode & S_ISGID) change_egid(imgp->newcred, attr.va_gid); /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } else { /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { VOP_UNLOCK(imgp->vp); imgp->newcred = crdup(oldcred); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } } /* The new credentials are installed into the process later. */ - /* - * If the current process has a special image activator it - * wants to try first, call it. For example, emulating shell - * scripts differently. - */ - error = -1; - if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) - error = img_first(imgp); - /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ + error = -1; for (i = 0; error == -1 && execsw[i]; ++i) { - if (execsw[i]->ex_imgact == NULL || - execsw[i]->ex_imgact == img_first) { + if (execsw[i]->ex_imgact == NULL) continue; - } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) error = ENOEXEC; goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * The text reference needs to be removed for scripts. * There is a short period before we determine that * something is a script where text reference is active. * The vnode lock is held over this entire period * so nothing should illegitimately be blocked. */ MPASS(imgp->textset); VOP_UNSET_TEXT_CHECKED(newtextvp); imgp->textset = false; /* free name buffer and old vnode */ #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = false; } vput(newtextvp); imgp->vp = newtextvp = NULL; if (args->fname != NULL) { if (newtextdvp != NULL) { vrele(newtextdvp); newtextdvp = NULL; } NDFREE_PNBUF(&nd); free(newbinname, M_PARGS); newbinname = NULL; } vm_object_deallocate(imgp->object); imgp->object = NULL; execve_nosetid(imgp); imgp->execpath = NULL; free(imgp->freepath, M_TEMP); imgp->freepath = NULL; /* set new name to that of the interpreter */ if (imgp->interpreter_vp) { args->fname = NULL; } else { args->fname = imgp->interpreter_name; } goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp); if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Copy out strings (args and env) and initialize stack base. */ error = (*p->p_sysent->sv_copyout_strings)(imgp, &stack_base); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Stack setup. */ error = (*p->p_sysent->sv_fixup)(&stack_base, imgp); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * For security and other reasons, the file descriptor table cannot be * shared after an exec. */ fdunshare(td); pdunshare(td); /* close files on exec */ fdcloseexec(td); /* * Malloc things before we need locks. */ i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0) p->p_flag2 &= ~P2_STKGAP_DISABLE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); /* STOPs are no longer ignored, arrange for AST */ signotify(td); } if ((imgp->sysent->sv_setid_allowed != NULL && !(*imgp->sysent->sv_setid_allowed)(td, imgp)) || (p->p_flag2 & P2_NO_NEW_PRIVS) != 0) execve_nosetid(imgp); /* * Implement image setuid/setgid installation. */ if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE kiop = ktrprocexec(p); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp); fdsetugidsafety(td); error = fdcheckstd(td); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto exec_fail_dealloc; PROC_LOCK(p); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, imgp->newcred, imgp->vp, interpvplabel, imgp); } #endif } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; } /* * Set the new credentials. */ if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); crfree(oldcred); oldcred = NULL; } /* * Store the vp for use in kern.proc.pathname. This vnode was * referenced by namei() or by fexecve variant of fname handling. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; oldtextdvp = p->p_textdvp; p->p_textdvp = newtextdvp; newtextdvp = NULL; oldbinname = p->p_binname; p->p_binname = newbinname; newbinname = NULL; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; PROC_UNLOCK(p); #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { VOP_UNLOCK(imgp->vp); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } #endif /* Set values passed into the program in registers. */ (*p->p_sysent->sv_setregs)(td, imgp, stack_base); VOP_MMAPPED(imgp->vp); SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: if (error != 0) { p->p_osrel = orig_osrel; p->p_fctl0 = orig_fctl0; p->p_elf_brandinfo = orig_brandinfo; } if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (imgp->textset) VOP_UNSET_TEXT_CHECKED(imgp->vp); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp); if (args->fname != NULL) NDFREE_PNBUF(&nd); if (newtextdvp != NULL) vrele(newtextdvp); free(newbinname, M_PARGS); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { if (p->p_ptevents & PTRACE_EXEC) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_EXEC) td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); } } else { exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); } if (imgp->newcred != NULL && oldcred != NULL) crfree(imgp->newcred); #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); if (oldtextdvp != NULL) vrele(oldtextdvp); free(oldbinname, M_PARGS); #ifdef KTRACE ktr_io_params_free(kiop); #endif pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); if (euip != NULL) uifree(euip); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exec_cleanup(td, oldvmspace); exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif /* * We don't want cpu_set_syscall_retval() to overwrite any of * the register values put in place by exec_setregs(). * Implementations of cpu_set_syscall_retval() will leave * registers unmodified when returning EJUSTRETURN. */ return (error == 0 ? EJUSTRETURN : error); } void exec_cleanup(struct thread *td, struct vmspace *oldvmspace) { if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(td->td_proc->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } } int exec_map_first_page(struct image_params *imgp) { vm_object_t object; vm_page_t m; int error; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); #if VM_NRESERVLEVEL > 0 if ((object->flags & OBJ_COLORED) == 0) { VM_OBJECT_WLOCK(object); vm_object_color(object, 0); VM_OBJECT_WUNLOCK(object); } #endif error = vm_page_grab_valid_unlocked(&m, object, 0, VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); if (error != VM_PAGER_OK) return (EIO); imgp->firstpage = sf_buf_alloc(m, 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(struct image_params *imgp) { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_unwire(m, PQ_ACTIVE); } } void exec_onexec_old(struct thread *td) { sigfastblock_clear(td); umtx_exec(td->td_proc); } /* * This is an optimization which removes the unmanaged shared page * mapping. In combination with pmap_remove_pages(), which cleans all * managed mappings in the process' vmspace pmap, no work will be left * for pmap_remove(min, max). */ void exec_free_abi_mappings(struct proc *p) { struct vmspace *vmspace; vmspace = p->p_vmspace; if (refcount_load(&vmspace->vm_refcnt) != 1) return; if (!PROC_HAS_SHP(p)) return; pmap_remove(vmspace_pmap(vmspace), vmspace->vm_shp_base, vmspace->vm_shp_base + p->p_sysent->sv_shared_page_len); } /* * Run down the current address space and install a new one. */ int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; struct thread *td = curthread; vm_offset_t sv_minuser; vm_map_t map; imgp->vmspace_destroyed = true; imgp->sysent = sv; if (p->p_sysent->sv_onexec_old != NULL) p->p_sysent->sv_onexec_old(td); itimers_exec(p); EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (refcount_load(&vmspace->vm_refcnt) == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser && cpu_exec_vmspace_reuse(p, map)) { exec_free_abi_mappings(p); shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); /* * An exec terminates mlockall(MCL_FUTURE). * ASLR and W^X states must be re-evaluated. */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } map->flags |= imgp->map_flags; return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0); } /* * Compute the stack size limit and map the main process stack. * Map the shared page. */ int exec_map_stack(struct image_params *imgp) { struct rlimit rlim_stack; struct sysentvec *sv; struct proc *p; vm_map_t map; struct vmspace *vmspace; vm_offset_t stack_addr, stack_top; vm_offset_t sharedpage_addr; u_long ssiz; int error, find_space, stack_off; vm_prot_t stack_prot; vm_object_t obj; p = imgp->proc; sv = p->p_sysent; if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } vmspace = p->p_vmspace; map = &vmspace->vm_map; stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot; if ((map->flags & MAP_ASLR_STACK) != 0) { stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + lim_max(curthread, RLIMIT_DATA)); find_space = VMFS_ANY_SPACE; } else { stack_addr = sv->sv_usrstack - ssiz; find_space = VMFS_NO_SPACE; } error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz, sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error != KERN_SUCCESS) { uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x " "failed, mach error %d errno %d\n", (uintmax_t)ssiz, stack_prot, error, vm_mmap_to_errno(error)); return (vm_mmap_to_errno(error)); } stack_top = stack_addr + ssiz; if ((map->flags & MAP_ASLR_STACK) != 0) { /* Randomize within the first page of the stack. */ arc4rand(&stack_off, sizeof(stack_off), 0); stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *)); } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj == NULL) { sharedpage_addr = 0; goto out; } /* * If randomization is disabled then the shared page will * be mapped at address specified in sysentvec. * Otherwise any address above .data section can be selected. * Same logic is used for stack address randomization. * If the address randomization is applied map a guard page * at the top of UVA. */ vm_object_reference(obj); if ((imgp->imgp_flags & IMGP_ASLR_SHARED_PAGE) != 0) { sharedpage_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + lim_max(curthread, RLIMIT_DATA)); error = vm_map_fixed(map, NULL, 0, sv->sv_maxuser - PAGE_SIZE, PAGE_SIZE, VM_PROT_NONE, VM_PROT_NONE, MAP_CREATE_GUARD); if (error != KERN_SUCCESS) { /* * This is not fatal, so let's just print a warning * and continue. */ uprintf("%s: Mapping guard page at the top of UVA failed" " mach error %d errno %d", __func__, error, vm_mmap_to_errno(error)); } error = vm_map_find(map, obj, 0, &sharedpage_addr, sv->sv_shared_page_len, sv->sv_maxuser, VMFS_ANY_SPACE, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); } else { sharedpage_addr = sv->sv_shared_page_base; vm_map_fixed(map, obj, 0, sharedpage_addr, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); } if (error != KERN_SUCCESS) { uprintf("%s: mapping shared page at addr: %p" "failed, mach error %d errno %d\n", __func__, (void *)sharedpage_addr, error, vm_mmap_to_errno(error)); vm_object_deallocate(obj); return (vm_mmap_to_errno(error)); } out: /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_maxsaddr = (char *)stack_addr; vmspace->vm_stacktop = stack_top; vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_shp_base = sharedpage_addr; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, const char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long arg, env; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ error = exec_args_add_fname(args, fname, segflg); if (error != 0) goto err_exit; /* * extract arguments first */ for (;;) { error = fueword(argv++, &arg); if (error == -1) { error = EFAULT; goto err_exit; } if (arg == 0) break; error = exec_args_add_arg(args, (char *)(uintptr_t)arg, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &env); if (error == -1) { error = EFAULT; goto err_exit; } if (env == 0) break; error = exec_args_add_env(args, (char *)(uintptr_t)env, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } struct exec_args_kva { vm_offset_t addr; u_int gen; SLIST_ENTRY(exec_args_kva) next; }; DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva); static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist; static struct mtx exec_args_kva_mtx; static u_int exec_args_gen; static void exec_prealloc_args_kva(void *arg __unused) { struct exec_args_kva *argkva; u_int i; SLIST_INIT(&exec_args_kva_freelist); mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); for (i = 0; i < exec_map_entries; i++) { argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size); argkva->gen = exec_args_gen; SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); } } SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL); static vm_offset_t exec_alloc_args_kva(void **cookie) { struct exec_args_kva *argkva; argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_PTR(exec_args_kva)); if (argkva == NULL) { mtx_lock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL) (void)mtx_sleep(&exec_args_kva_freelist, &exec_args_kva_mtx, 0, "execkva", 0); SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next); mtx_unlock(&exec_args_kva_mtx); } kasan_mark((void *)argkva->addr, exec_map_entry_size, exec_map_entry_size, 0); *(struct exec_args_kva **)cookie = argkva; return (argkva->addr); } static void exec_release_args_kva(struct exec_args_kva *argkva, u_int gen) { vm_offset_t base; base = argkva->addr; kasan_mark((void *)argkva->addr, 0, exec_map_entry_size, KASAN_EXEC_ARGS_FREED); if (argkva->gen != gen) { (void)vm_map_madvise(exec_map, base, base + exec_map_entry_size, MADV_FREE); argkva->gen = gen; } if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva), (uintptr_t)NULL, (uintptr_t)argkva)) { mtx_lock(&exec_args_kva_mtx); SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); wakeup_one(&exec_args_kva_freelist); mtx_unlock(&exec_args_kva_mtx); } } static void exec_free_args_kva(void *cookie) { exec_release_args_kva(cookie, exec_args_gen); } static void exec_args_kva_lowmem(void *arg __unused) { SLIST_HEAD(, exec_args_kva) head; struct exec_args_kva *argkva; u_int gen; int i; gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1; /* * Force an madvise of each KVA range. Any currently allocated ranges * will have MADV_FREE applied once they are freed. */ SLIST_INIT(&head); mtx_lock(&exec_args_kva_mtx); SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva); mtx_unlock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&head)) != NULL) { SLIST_REMOVE_HEAD(&head, next); exec_release_args_kva(argkva, gen); } CPU_FOREACH(i) { argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva)); if (argkva != NULL) exec_release_args_kva(argkva, gen); } } EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL, EVENTHANDLER_PRI_ANY); /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)exec_alloc_args_kva(&args->bufkva); return (0); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { exec_free_args_kva(args->bufkva); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } } /* * A set to functions to fill struct image args. * * NOTE: exec_args_add_fname() must be called (possibly with a NULL * fname) before the other functions. All exec_args_add_arg() calls must * be made before any exec_args_add_env() calls. exec_args_adjust_args() * may be called any time after exec_args_add_fname(). * * exec_args_add_fname() - install path to be executed * exec_args_add_arg() - append an argument string * exec_args_add_env() - append an env string * exec_args_adjust_args() - adjust location of the argument list to * allow new arguments to be prepended */ int exec_args_add_fname(struct image_args *args, const char *fname, enum uio_seg segflg) { int error; size_t length; KASSERT(args->fname == NULL, ("fname already appended")); KASSERT(args->endp == NULL, ("already appending to args")); if (fname != NULL) { args->fname = args->buf; error = segflg == UIO_SYSSPACE ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) return (error == ENAMETOOLONG ? E2BIG : error); } else length = 0; /* Set up for _arg_*()/_env_*() */ args->endp = args->buf + length; /* begin_argv must be set and kept updated */ args->begin_argv = args->endp; KASSERT(exec_map_entry_size - length >= ARG_MAX, ("too little space remaining for arguments %zu < %zu", exec_map_entry_size - length, (size_t)ARG_MAX)); args->stringspace = ARG_MAX; return (0); } static int exec_args_add_str(struct image_args *args, const char *str, enum uio_seg segflg, int *countp) { int error; size_t length; KASSERT(args->endp != NULL, ("endp not initialized")); KASSERT(args->begin_argv != NULL, ("begin_argp not initialized")); error = (segflg == UIO_SYSSPACE) ? copystr(str, args->endp, args->stringspace, &length) : copyinstr(str, args->endp, args->stringspace, &length); if (error != 0) return (error == ENAMETOOLONG ? E2BIG : error); args->stringspace -= length; args->endp += length; (*countp)++; return (0); } int exec_args_add_arg(struct image_args *args, const char *argp, enum uio_seg segflg) { KASSERT(args->envc == 0, ("appending args after env")); return (exec_args_add_str(args, argp, segflg, &args->argc)); } int exec_args_add_env(struct image_args *args, const char *envp, enum uio_seg segflg) { if (args->envc == 0) args->begin_envv = args->endp; return (exec_args_add_str(args, envp, segflg, &args->envc)); } int exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend) { ssize_t offset; KASSERT(args->endp != NULL, ("endp not initialized")); KASSERT(args->begin_argv != NULL, ("begin_argp not initialized")); offset = extend - consume; if (args->stringspace < offset) return (E2BIG); memmove(args->begin_argv + extend, args->begin_argv + consume, args->endp - args->begin_argv + consume); if (args->envc > 0) args->begin_envv += offset; args->endp += offset; args->stringspace -= offset; return (0); } char * exec_args_get_begin_envv(struct image_args *args) { KASSERT(args->endp != NULL, ("endp not initialized")); if (args->envc > 0) return (args->begin_envv); return (args->endp); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ int exec_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; struct proc *p; struct sysentvec *sysent; size_t execpath_len; int error, szsigcode; char canary[sizeof(long) * 8]; p = imgp->proc; sysent = p->p_sysent; destp = PROC_PS_STRINGS(p); arginfo = imgp->ps_strings = (void *)destp; /* * Install sigcode. */ if (sysent->sv_shared_page_base == 0 && sysent->sv_szsigcode != NULL) { szsigcode = *(sysent->sv_szsigcode); destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); error = copyout(sysent->sv_sigcode, (void *)destp, szsigcode); if (error != 0) return (error); } /* * Copy the image path for the rtld. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) { execpath_len = strlen(imgp->execpath) + 1; destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ imgp->pagesizeslen = sizeof(pagesizes[0]) * MAXPAGESIZES; destp -= imgp->pagesizeslen; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = (void *)destp; error = copyout(pagesizes, imgp->pagesizes, imgp->pagesizeslen); if (error != 0) return (error); /* * Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to AT_COUNT entries. */ destp -= AT_COUNT * sizeof(Elf_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * vectp also becomes our initial stack base */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* * Fill in "ps_strings" struct for ps, w, etc. */ imgp->argv = vectp; if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* a null vector table pointer separates the argp's from the envp's */ if (suword(vectp++, 0) != 0) return (EFAULT); imgp->envv = vectp; if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* end of vector table is a null pointer */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(struct image_params *imgp) { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. * * Add a text reference now so no one can write to the * executable while we're activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ error = VOP_SET_TEXT(vp); if (error != 0) return (error); imgp->textset = true; /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = true; return (error); } /* * Exec handler registration */ int exec_register(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; u_int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } /* * Write out a core segment to the compression stream. */ static int compress_chunk(struct coredump_params *cp, char *base, char *buf, size_t len) { size_t chunk_len; int error; while (len > 0) { chunk_len = MIN(len, CORE_BUF_SIZE); /* * We can get EFAULT error here. * In that case zero out the current chunk of the segment. */ error = copyin(base, buf, chunk_len); if (error != 0) bzero(buf, chunk_len); error = compressor_write(cp->comp, buf, chunk_len); if (error != 0) break; base += chunk_len; len -= chunk_len; } return (error); } int core_write(struct coredump_params *cp, const void *base, size_t len, off_t offset, enum uio_seg seg, size_t *resid) { return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base), len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, cp->active_cred, cp->file_cred, resid, cp->td)); } int core_output(char *base, size_t len, off_t offset, struct coredump_params *cp, void *tmpbuf) { vm_map_t map; struct mount *mp; size_t resid, runlen; int error; bool success; KASSERT((uintptr_t)base % PAGE_SIZE == 0, ("%s: user address %p is not page-aligned", __func__, base)); if (cp->comp != NULL) return (compress_chunk(cp, base, tmpbuf, len)); map = &cp->td->td_proc->p_vmspace->vm_map; for (; len > 0; base += runlen, offset += runlen, len -= runlen) { /* * Attempt to page in all virtual pages in the range. If a * virtual page is not backed by the pager, it is represented as * a hole in the file. This can occur with zero-filled * anonymous memory or truncated files, for example. */ for (runlen = 0; runlen < len; runlen += PAGE_SIZE) { if (core_dump_can_intr && curproc_sigkilled()) return (EINTR); error = vm_fault(map, (uintptr_t)base + runlen, VM_PROT_READ, VM_FAULT_NOFILL, NULL); if (runlen == 0) success = error == KERN_SUCCESS; else if ((error == KERN_SUCCESS) != success) break; } if (success) { error = core_write(cp, base, runlen, offset, UIO_USERSPACE, &resid); if (error != 0) { if (error != EFAULT) break; /* * EFAULT may be returned if the user mapping * could not be accessed, e.g., because a mapped * file has been truncated. Skip the page if no * progress was made, to protect against a * hypothetical scenario where vm_fault() was * successful but core_write() returns EFAULT * anyway. */ runlen -= resid; if (runlen == 0) { success = false; runlen = PAGE_SIZE; } } } if (!success) { error = vn_start_write(cp->vp, &mp, V_WAIT); if (error != 0) break; vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY); error = vn_truncate_locked(cp->vp, offset + runlen, false, cp->td->td_ucred); VOP_UNLOCK(cp->vp); vn_finished_write(mp); if (error != 0) break; } } return (error); } /* * Drain into a core file. */ int sbuf_drain_core_output(void *arg, const char *data, int len) { struct coredump_params *cp; struct proc *p; int error, locked; cp = arg; p = cp->td->td_proc; /* * Some kern_proc out routines that print to this sbuf may * call us with the process lock held. Draining with the * non-sleepable lock held is unsafe. The lock is needed for * those routines when dumping a live process. In our case we * can safely release the lock before draining and acquire * again after. */ locked = PROC_LOCKED(p); if (locked) PROC_UNLOCK(p); if (cp->comp != NULL) error = compressor_write(cp->comp, __DECONST(char *, data), len); else error = core_write(cp, __DECONST(void *, data), len, cp->offset, UIO_SYSSPACE, NULL); if (locked) PROC_LOCK(p); if (error != 0) return (-error); cp->offset += len; return (len); } diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c index af8c3abccb5d..0471a8a849fa 100644 --- a/sys/powerpc/powerpc/elf32_machdep.c +++ b/sys/powerpc/powerpc/elf32_machdep.c @@ -1,472 +1,471 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __powerpc64__ #include #include extern const char *freebsd32_syscallnames[]; static void ppc32_fixlimit(struct rlimit *rl, int which); static SYSCTL_NODE(_compat, OID_AUTO, ppc32, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "32-bit mode"); #define PPC32_MAXDSIZ (1024*1024*1024) static u_long ppc32_maxdsiz = PPC32_MAXDSIZ; SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ppc32_maxdsiz, 0, ""); #define PPC32_MAXSSIZ (64*1024*1024) u_long ppc32_maxssiz = PPC32_MAXSSIZ; SYSCTL_ULONG(_compat_ppc32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ppc32_maxssiz, 0, ""); #else static void ppc32_runtime_resolve(void); #endif struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, #ifdef __powerpc64__ .sv_table = freebsd32_sysent, #else .sv_table = sysent, #endif .sv_fixup = __elfN(freebsd_fixup), .sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs), .sv_sendsig = sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_stackprot = VM_PROT_ALL, #ifdef __powerpc64__ .sv_maxuser = VM_MAXUSER_ADDRESS32, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_psstringssz = sizeof(struct freebsd32_ps_strings), .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = ppc32_setregs, .sv_syscallnames = freebsd32_syscallnames, .sv_fixlimit = ppc32_fixlimit, #else .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_syscallnames = syscallnames, .sv_fixlimit = NULL, #endif .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_ASLR | SV_TIMEKEEP | SV_RNG_SEED_VER, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, #ifdef __powerpc64__ .interp_newpath = "/libexec/ld-elf32.so.1", #else .interp_newpath = NULL, #endif .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; uint64_t vshr[32]; uint64_t *vsr_dw1; int vsr_idx; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf32_populate_note(NT_PPC_VMX, &pcb->pcb_vec, (char *)dst + len, sizeof(pcb->pcb_vec), NULL); } else len += elf32_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } if (pcb->pcb_flags & PCB_VSX) { save_fpu_nodrop(td); if (dst != NULL) { /* * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and * VSR32-VSR63 overlap with VR0-VR31, so we only copy * the non-overlapping data, which is doubleword 1 of VSR0-VSR31. */ for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) { vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2]; vshr[vsr_idx] = *vsr_dw1; } len += elf32_populate_note(NT_PPC_VSX, vshr, (char *)dst + len, sizeof(vshr), NULL); } else len += elf32_populate_note(NT_PPC_VSX, NULL, NULL, sizeof(vshr), NULL); } *off = len; } #ifndef __powerpc64__ bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Half *hwhere; Elf_Addr addr; Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset); hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC_ADDR32: /* word32 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); *where = elf_relocaddr(lf, addr + addend); break; case R_PPC_ADDR16_LO: /* #lo(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = addr & 0xffff; break; case R_PPC_ADDR16_HA: /* #ha(S) */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); /* * addend values are sometimes relative to sections * (i.e. .rodata) in rela, where in reality they * are relative to relocbase. Detect this condition. */ if (addr > relocbase && addr <= (relocbase + addend)) addr = relocbase; addr = elf_relocaddr(lf, addr + addend); *hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0)) & 0xffff; break; case R_PPC_RELATIVE: /* word32 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; case R_PPC_JMP_SLOT: /* PLT jump slot entry */ /* * We currently only support Secure-PLT jump slots. * Given that we reject BSS-PLT modules during load, we * don't need to check again. * The method we are using here is equivilent to * LD_BIND_NOW. */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); *where = elf_relocaddr(lf, addr + addend); break; case R_PPC_IRELATIVE: addr = relocbase + addend; val = ((Elf32_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d, " "symbol index %d\n", (int)rtype, symidx); return (-1); } return (0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = NULL, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } static void ppc32_runtime_resolve(void) { /* * Since we don't support lazy binding, panic immediately if anyone * manages to call the runtime resolver. */ panic("kldload: Runtime resolver was called unexpectedly!"); } int elf_cpu_parse_dynamic(caddr_t loadbase, Elf_Dyn *dynamic) { Elf_Dyn *dp; bool has_plt = false; bool secure_plt = false; Elf_Addr *got; for (dp = dynamic; dp->d_tag != DT_NULL; dp++) { switch (dp->d_tag) { case DT_PPC_GOT: secure_plt = true; got = (Elf_Addr *)(loadbase + dp->d_un.d_ptr); /* Install runtime resolver canary. */ got[1] = (Elf_Addr)ppc32_runtime_resolve; got[2] = (Elf_Addr)0; break; case DT_PLTGOT: has_plt = true; break; } } if (has_plt && !secure_plt) { printf("kldload: BSS-PLT modules are not supported.\n"); return (-1); } return (0); } #endif #ifdef __powerpc64__ static void ppc32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (ppc32_maxdsiz != 0) { if (rl->rlim_cur > ppc32_maxdsiz) rl->rlim_cur = ppc32_maxdsiz; if (rl->rlim_max > ppc32_maxdsiz) rl->rlim_max = ppc32_maxdsiz; } break; case RLIMIT_STACK: if (ppc32_maxssiz != 0) { if (rl->rlim_cur > ppc32_maxssiz) rl->rlim_cur = ppc32_maxssiz; if (rl->rlim_max > ppc32_maxssiz) rl->rlim_max = ppc32_maxssiz; } break; } } #endif diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c index 46de0de0348a..6d3ba24560b0 100644 --- a/sys/powerpc/powerpc/elf64_machdep.c +++ b/sys/powerpc/powerpc/elf64_machdep.c @@ -1,464 +1,462 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, uintptr_t stack); struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64, .sv_szsigcode = &szsigcode64, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs_funcdesc, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR | SV_TIMEKEEP | SV_RNG_SEED_VER, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; struct sysentvec elf64_freebsd_sysvec_v2 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode64, /* Fixed up in ppc64_init_sysvecs(). */ .sv_szsigcode = &szsigcode64, .sv_name = "FreeBSD ELF64 V2", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(powerpc_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &cpu_features, .sv_hwcap2 = &cpu_features2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; static bool ppc64_elfv1_header_match(struct image_params *params, int32_t *, uint32_t *); static bool ppc64_elfv2_header_match(struct image_params *params, int32_t *, uint32_t *); static Elf64_Brandinfo freebsd_brand_info_elfv1 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv1); static Elf64_Brandinfo freebsd_brand_info_elfv2 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v2, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv2_header_match }; SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv2); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_v1, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = &ppc64_elfv1_header_match }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase); static void ppc64_init_sysvecs(void *arg) { exec_sysvec_init(&elf64_freebsd_sysvec_v2); exec_sysvec_init_secondary(&elf64_freebsd_sysvec_v2, &elf64_freebsd_sysvec_v1); /* * Adjust elfv2 sigcode after elfv1 sysvec is initialized. * exec_sysvec_init_secondary() assumes secondary sysvecs use * identical signal code, and skips allocating a second copy. * Since the ELFv2 trampoline is a strict subset of the ELFv1 code, * we can work around this by adjusting the offset. This also * avoids two copies of the trampoline code being allocated! */ elf64_freebsd_sysvec_v2.sv_sigcode_offset += (uintptr_t)sigcode64_elfv2 - (uintptr_t)&sigcode64; elf64_freebsd_sysvec_v2.sv_szsigcode = &szsigcode64_elfv2; } SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, ppc64_init_sysvecs, NULL); static bool ppc64_elfv1_header_match(struct image_params *params, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 0 || abi == 1); } static bool ppc64_elfv2_header_match(struct image_params *params, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header; int abi = (hdr->e_flags & 3); return (abi == 2); } static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf; register_t entry_desc[3]; tf = trapframe(td); exec_setregs(td, imgp, stack); /* * For 64-bit ELFv1, we need to disentangle the function * descriptor * * 0. entry point * 1. TOC value (r2) * 2. Environment pointer (r11) */ (void)copyin((void *)imgp->entry_addr, entry_desc, sizeof(entry_desc)); tf->srr0 = entry_desc[0] + imgp->reloc_base; tf->fixreg[2] = entry_desc[1] + imgp->reloc_base; tf->fixreg[11] = entry_desc[2] + imgp->reloc_base; } void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { size_t len; struct pcb *pcb; uint64_t vshr[32]; uint64_t *vsr_dw1; int vsr_idx; len = 0; pcb = td->td_pcb; if (pcb->pcb_flags & PCB_VEC) { save_vec_nodrop(td); if (dst != NULL) { len += elf64_populate_note(NT_PPC_VMX, &pcb->pcb_vec, (char *)dst + len, sizeof(pcb->pcb_vec), NULL); } else len += elf64_populate_note(NT_PPC_VMX, NULL, NULL, sizeof(pcb->pcb_vec), NULL); } if (pcb->pcb_flags & PCB_VSX) { save_fpu_nodrop(td); if (dst != NULL) { /* * Doubleword 0 of VSR0-VSR31 overlap with FPR0-FPR31 and * VSR32-VSR63 overlap with VR0-VR31, so we only copy * the non-overlapping data, which is doubleword 1 of VSR0-VSR31. */ for (vsr_idx = 0; vsr_idx < nitems(vshr); vsr_idx++) { vsr_dw1 = (uint64_t *)&pcb->pcb_fpu.fpr[vsr_idx].vsr[2]; vshr[vsr_idx] = *vsr_dw1; } len += elf64_populate_note(NT_PPC_VSX, vshr, (char *)dst + len, sizeof(vshr), NULL); } else len += elf64_populate_note(NT_PPC_VSX, NULL, NULL, sizeof(vshr), NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: panic("PPC only supports RELA relocations"); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("elf_reloc: unknown relocation mode %d\n", type); } switch (rtype) { case R_PPC_NONE: break; case R_PPC64_ADDR64: /* doubleword64 S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); addr += addend; *where = addr; break; case R_PPC_RELATIVE: /* doubleword64 B + A */ *where = elf_relocaddr(lf, relocbase + addend); break; case R_PPC_JMP_SLOT: /* function descriptor copy */ lookup(lf, symidx, 1, &addr); #if !defined(_CALL_ELF) || _CALL_ELF == 1 memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr)); #else *where = addr; #endif __asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory"); break; case R_PPC_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %d, " "symbol index %d\n", (int)rtype, symidx); return (-1); } return (0); } void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase) { Elf_Rela *rela = NULL, *relalim; Elf_Addr relasz = 0; Elf_Addr *where; /* * Extract the rela/relasz values from the dynamic section */ for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_RELA: rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr); break; case DT_RELASZ: relasz = dynp->d_un.d_val; break; } } /* * Relocate these values */ relalim = (Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE) continue; where = (Elf_Addr *)(relocbase + rela->r_offset); *where = (Elf_Addr)(relocbase + rela->r_addend); } } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf) { /* Only sync the cache for non-kernel modules */ if (lf->id != 1) __syncicache(lf->address, lf->size); return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } diff --git a/sys/riscv/riscv/elf_machdep.c b/sys/riscv/riscv/elf_machdep.c index 3f6df253f16e..e0d37257facd 100644 --- a/sys/riscv/riscv/elf_machdep.c +++ b/sys/riscv/riscv/elf_machdep.c @@ -1,566 +1,565 @@ /*- * Copyright 1996-1998 John D. Polstra. * Copyright (c) 2015 Ruslan Bukin * Copyright (c) 2016 Yukishige Shibata * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static const char *riscv_machine_arch(struct proc *p); u_long elf_hwcap; static struct sysentvec elf64_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), - .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = 0, /* Filled in during boot. */ .sv_usrstack = 0, /* Filled in during boot. */ .sv_psstrings = 0, /* Filled in during boot. */ .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP | SV_ASLR | SV_RNG_SEED_VER, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = 0, /* Filled in during boot. */ .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf_hwcap, .sv_machine_arch = riscv_machine_arch, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_regset_begin = SET_BEGIN(__elfN(regset)), .sv_regset_end = SET_LIMIT(__elfN(regset)), }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); static const char * riscv_machine_arch(struct proc *p) { if ((p->p_elf_flags & EF_RISCV_FLOAT_ABI_MASK) == EF_RISCV_FLOAT_ABI_SOFT) return (MACHINE_ARCH "sf"); return (MACHINE_ARCH); } static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_RISCV, .compat_3_brand = "FreeBSD", .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static void elf64_register_sysvec(void *arg) { struct sysentvec *sv; sv = arg; switch (pmap_mode) { case PMAP_MODE_SV48: sv->sv_maxuser = VM_MAX_USER_ADDRESS_SV48; sv->sv_usrstack = USRSTACK_SV48; sv->sv_psstrings = PS_STRINGS_SV48; sv->sv_shared_page_base = SHAREDPAGE_SV48; break; case PMAP_MODE_SV39: sv->sv_maxuser = VM_MAX_USER_ADDRESS_SV39; sv->sv_usrstack = USRSTACK_SV39; sv->sv_psstrings = PS_STRINGS_SV39; sv->sv_shared_page_base = SHAREDPAGE_SV39; break; } } SYSINIT(elf64_register_sysvec, SI_SUB_VM, SI_ORDER_ANY, elf64_register_sysvec, &elf64_freebsd_sysvec); static bool debug_kld; SYSCTL_BOOL(_debug, OID_AUTO, kld_reloc, CTLFLAG_RW, &debug_kld, 0, "Activate debug prints in elf_reloc_internal()"); struct type2str_ent { int type; const char *str; }; void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { } /* * Following 4 functions are used to manipulate bits on 32bit integer value. * FIXME: I implemetend for ease-to-understand rather than for well-optimized. */ static uint32_t gen_bitmask(int msb, int lsb) { uint32_t mask; if (msb == sizeof(mask) * 8 - 1) mask = ~0; else mask = (1U << (msb + 1)) - 1; if (lsb > 0) mask &= ~((1U << lsb) - 1); return (mask); } static uint32_t extract_bits(uint32_t x, int msb, int lsb) { uint32_t mask; mask = gen_bitmask(msb, lsb); x &= mask; x >>= lsb; return (x); } static uint32_t insert_bits(uint32_t d, uint32_t s, int msb, int lsb) { uint32_t mask; mask = gen_bitmask(msb, lsb); d &= ~mask; s <<= lsb; s &= mask; return (d | s); } static uint32_t insert_imm(uint32_t insn, uint32_t imm, int imm_msb, int imm_lsb, int insn_lsb) { int insn_msb; uint32_t v; v = extract_bits(imm, imm_msb, imm_lsb); insn_msb = (imm_msb - imm_lsb) + insn_lsb; return (insert_bits(insn, v, insn_msb, insn_lsb)); } /* * The RISC-V ISA is designed so that all of immediate values are * sign-extended. * An immediate value is sometimes generated at runtime by adding * 12bit sign integer and 20bit signed integer. This requests 20bit * immediate value to be ajusted if the MSB of the 12bit immediate * value is asserted (sign-extended value is treated as negative value). * * For example, 0x123800 can be calculated by adding upper 20 bit of * 0x124000 and sign-extended 12bit immediate whose bit pattern is * 0x800 as follows: * 0x123800 * = 0x123000 + 0x800 * = (0x123000 + 0x1000) + (-0x1000 + 0x800) * = (0x123000 + 0x1000) + (0xff...ff800) * = 0x124000 + sign-extention(0x800) */ static uint32_t calc_hi20_imm(uint32_t value) { /* * There is the arithmetical hack that can remove conditional * statement. But I implement it in straightforward way. */ if ((value & 0x800) != 0) value += 0x1000; return (value & ~0xfff); } static const struct type2str_ent t2s[] = { { R_RISCV_NONE, "R_RISCV_NONE" }, { R_RISCV_64, "R_RISCV_64" }, { R_RISCV_JUMP_SLOT, "R_RISCV_JUMP_SLOT" }, { R_RISCV_RELATIVE, "R_RISCV_RELATIVE" }, { R_RISCV_JAL, "R_RISCV_JAL" }, { R_RISCV_CALL, "R_RISCV_CALL" }, { R_RISCV_PCREL_HI20, "R_RISCV_PCREL_HI20" }, { R_RISCV_PCREL_LO12_I, "R_RISCV_PCREL_LO12_I" }, { R_RISCV_PCREL_LO12_S, "R_RISCV_PCREL_LO12_S" }, { R_RISCV_HI20, "R_RISCV_HI20" }, { R_RISCV_LO12_I, "R_RISCV_LO12_I" }, { R_RISCV_LO12_S, "R_RISCV_LO12_S" }, }; static const char * reloctype_to_str(int type) { int i; for (i = 0; i < sizeof(t2s) / sizeof(t2s[0]); ++i) { if (type == t2s[i].type) return t2s[i].str; } return "*unknown*"; } bool elf_is_ifunc_reloc(Elf_Size r_info __unused) { return (false); } /* * Currently kernel loadable module for RISCV is compiled with -fPIC option. * (see also additional CFLAGS definition for RISCV in sys/conf/kmod.mk) * Only R_RISCV_64, R_RISCV_JUMP_SLOT and RISCV_RELATIVE are emitted in * the module. Other relocations will be processed when kernel loadable * modules are built in non-PIC. * * FIXME: only RISCV64 is supported. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Size rtype, symidx; const Elf_Rela *rela; Elf_Addr val, addr; Elf64_Addr *where; Elf_Addr addend; uint32_t before32_1; uint32_t before32; uint64_t before64; uint32_t *insn32p; uint32_t imm20; int error; switch (type) { case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *)(relocbase + rela->r_offset); insn32p = (uint32_t *)where; addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: printf("%s:%d unknown reloc type %d\n", __FUNCTION__, __LINE__, type); return (-1); } switch (rtype) { case R_RISCV_NONE: break; case R_RISCV_64: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); before64 = *where; *where = addr + addend; if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_JUMP_SLOT: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); before64 = *where; *where = addr; if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_RELATIVE: before64 = *where; *where = elf_relocaddr(lf, relocbase + addend); if (debug_kld) printf("%p %c %-24s %016lx -> %016lx\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before64, *where); break; case R_RISCV_JAL: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; if (val <= -(1UL << 20) || (1UL << 20) <= val) { printf("kldload: huge offset against R_RISCV_JAL\n"); return (-1); } before32 = *insn32p; *insn32p = insert_imm(*insn32p, val, 20, 20, 31); *insn32p = insert_imm(*insn32p, val, 10, 1, 21); *insn32p = insert_imm(*insn32p, val, 11, 11, 20); *insn32p = insert_imm(*insn32p, val, 19, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_CALL: /* * R_RISCV_CALL relocates 8-byte region that consists * of the sequence of AUIPC and JALR. */ /* Calculate and check the pc relative offset. */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; if (val <= -(1UL << 32) || (1UL << 32) <= val) { printf("kldload: huge offset against R_RISCV_CALL\n"); return (-1); } /* Relocate AUIPC. */ before32 = insn32p[0]; imm20 = calc_hi20_imm(val); insn32p[0] = insert_imm(insn32p[0], imm20, 31, 12, 12); /* Relocate JALR. */ before32_1 = insn32p[1]; insn32p[1] = insert_imm(insn32p[1], val, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x %08x -> %08x %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, insn32p[0], before32_1, insn32p[1]); break; case R_RISCV_PCREL_HI20: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; insn32p = (uint32_t *)where; before32 = *insn32p; imm20 = calc_hi20_imm(val); *insn32p = insert_imm(*insn32p, imm20, 31, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_PCREL_LO12_I: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_PCREL_LO12_S: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr - (Elf_Addr)where; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 5, 25); *insn32p = insert_imm(*insn32p, addr, 4, 0, 7); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_HI20: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; insn32p = (uint32_t *)where; before32 = *insn32p; imm20 = calc_hi20_imm(val); *insn32p = insert_imm(*insn32p, imm20, 31, 12, 12); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_LO12_I: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 0, 20); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; case R_RISCV_LO12_S: error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); val = addr; insn32p = (uint32_t *)where; before32 = *insn32p; *insn32p = insert_imm(*insn32p, addr, 11, 5, 25); *insn32p = insert_imm(*insn32p, addr, 4, 0, 7); if (debug_kld) printf("%p %c %-24s %08x -> %08x\n", where, (local ? 'l' : 'g'), reloctype_to_str(rtype), before32, *insn32p); break; default: printf("kldload: unexpected relocation type %ld, " "symbol index %ld\n", rtype, symidx); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index a77feb5bcbf7..c59fa380aa4c 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -1,345 +1,344 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSENT_H_ #define _SYS_SYSENT_H_ #include struct rlimit; struct sysent; struct thread; struct ksiginfo; struct syscall_args; enum systrace_probe_t { SYSTRACE_ENTRY, SYSTRACE_RETURN, }; typedef int sy_call_t(struct thread *, void *); typedef void (*systrace_probe_func_t)(struct syscall_args *, enum systrace_probe_t, int); typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *); #ifdef _KERNEL extern systrace_probe_func_t systrace_probe_func; extern bool systrace_enabled; #ifdef KDTRACE_HOOKS #define SYSTRACE_ENABLED() (systrace_enabled) #else #define SYSTRACE_ENABLED() (0) #endif #endif /* _KERNEL */ struct sysent { /* system call table */ sy_call_t *sy_call; /* implementing function */ systrace_args_func_t sy_systrace_args_func; /* optional argument conversion function. */ u_int8_t sy_narg; /* number of arguments */ u_int8_t sy_flags; /* General flags for system calls. */ au_event_t sy_auevent; /* audit event associated with syscall */ u_int32_t sy_entry; /* DTrace entry ID for systrace. */ u_int32_t sy_return; /* DTrace return ID for systrace. */ u_int32_t sy_thrcnt; }; /* * A system call is permitted in capability mode. */ #define SYF_CAPENABLED 0x00000001 #define SY_THR_FLAGMASK 0x7 #define SY_THR_STATIC 0x1 #define SY_THR_DRAINING 0x2 #define SY_THR_ABSENT 0x4 #define SY_THR_INCR 0x8 #ifdef KLD_MODULE #define SY_THR_STATIC_KLD 0 #else #define SY_THR_STATIC_KLD SY_THR_STATIC #endif struct image_params; struct proc; struct __sigset; struct trapframe; struct vnode; struct note_info_list; struct sysentvec { int sv_size; /* number of entries */ struct sysent *sv_table; /* pointer to sysent */ int (*sv_fixup)(uintptr_t *, struct image_params *); /* stack fixup function */ void (*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *); /* send signal */ const char *sv_sigcode; /* start of sigtramp code */ int *sv_szsigcode; /* size of sigtramp code */ int sv_sigcodeoff; char *sv_name; /* name of binary type */ int (*sv_coredump)(struct thread *, struct vnode *, off_t, int); /* function to dump core, or NULL */ int sv_elf_core_osabi; const char *sv_elf_core_abi_vendor; void (*sv_elf_core_prepare_notes)(struct thread *, struct note_info_list *, size_t *); - int (*sv_imgact_try)(struct image_params *); int (*sv_copyout_auxargs)(struct image_params *, uintptr_t); int sv_minsigstksz; /* minimum signal stack size */ vm_offset_t sv_minuser; /* VM_MIN_ADDRESS */ vm_offset_t sv_maxuser; /* VM_MAXUSER_ADDRESS */ vm_offset_t sv_usrstack; /* USRSTACK */ vm_offset_t sv_psstrings; /* PS_STRINGS */ size_t sv_psstringssz; /* PS_STRINGS size */ int sv_stackprot; /* vm protection for stack */ int (*sv_copyout_strings)(struct image_params *, uintptr_t *); void (*sv_setregs)(struct thread *, struct image_params *, uintptr_t); void (*sv_fixlimit)(struct rlimit *, int); u_long *sv_maxssiz; u_int sv_flags; void (*sv_set_syscall_retval)(struct thread *, int); int (*sv_fetch_syscall_args)(struct thread *); const char **sv_syscallnames; vm_offset_t sv_timekeep_offset; vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_offset; void *sv_shared_page_obj; vm_offset_t sv_vdso_offset; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); int (*sv_trap)(struct thread *); u_long *sv_hwcap; /* Value passed in AT_HWCAP. */ u_long *sv_hwcap2; /* Value passed in AT_HWCAP2. */ const char *(*sv_machine_arch)(struct proc *); vm_offset_t sv_fxrng_gen_offset; void (*sv_onexec_old)(struct thread *td); int (*sv_onexec)(struct proc *, struct image_params *); void (*sv_onexit)(struct proc *); void (*sv_ontdexit)(struct thread *td); int (*sv_setid_allowed)(struct thread *td, struct image_params *imgp); void (*sv_set_fork_retval)(struct thread *); /* Only used on x86 */ struct regset **sv_regset_begin; struct regset **sv_regset_end; }; #define SV_ILP32 0x000100 /* 32-bit executable. */ #define SV_LP64 0x000200 /* 64-bit executable. */ #define SV_IA32 0x004000 /* Intel 32-bit executable. */ #define SV_AOUT 0x008000 /* a.out executable. */ #define SV_SHP 0x010000 /* Shared page. */ #define SV_AVAIL1 0x020000 /* Unused */ #define SV_TIMEKEEP 0x040000 /* Shared page timehands. */ #define SV_ASLR 0x080000 /* ASLR allowed. */ #define SV_RNG_SEED_VER 0x100000 /* random(4) reseed generation. */ #define SV_SIG_DISCIGN 0x200000 /* Do not discard ignored signals */ #define SV_SIG_WAITNDQ 0x400000 /* Wait does not dequeue SIGCHLD */ #define SV_DSO_SIG 0x800000 /* Signal trampoline packed in dso */ #define SV_ABI_MASK 0xff #define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x)) #define SV_PROC_ABI(p) ((p)->p_sysent->sv_flags & SV_ABI_MASK) #define SV_CURPROC_FLAG(x) SV_PROC_FLAG(curproc, x) #define SV_CURPROC_ABI() SV_PROC_ABI(curproc) /* same as ELFOSABI_XXX, to prevent header pollution */ #define SV_ABI_LINUX 3 #define SV_ABI_FREEBSD 9 #define SV_ABI_UNDEF 255 /* sv_coredump flags */ #define SVC_PT_COREDUMP 0x00000001 /* dump requested by ptrace(2) */ #define SVC_NOCOMPRESS 0x00000002 /* disable compression. */ #define SVC_ALL 0x00000004 /* dump everything */ #ifdef _KERNEL extern struct sysentvec aout_sysvec; extern struct sysent sysent[]; extern const char *syscallnames[]; struct nosys_args { register_t dummy; }; int nosys(struct thread *, struct nosys_args *); #define NO_SYSCALL (-1) struct module; struct syscall_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ int *offset; /* offset into sysent */ struct sysent *new_sysent; /* new sysent */ struct sysent old_sysent; /* old sysent */ int flags; /* flags for syscall_register */ }; /* separate initialization vector so it can be used in a substructure */ #define SYSENT_INIT_VALS(_syscallname) { \ .sy_narg = (sizeof(struct _syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)&sys_##_syscallname, \ .sy_auevent = SYS_AUE_##_syscallname, \ .sy_systrace_args_func = NULL, \ .sy_entry = 0, \ .sy_return = 0, \ .sy_flags = 0, \ .sy_thrcnt = 0 \ } #define MAKE_SYSENT(syscallname) \ static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname); #define MAKE_SYSENT_COMPAT(syscallname) \ static struct sysent syscallname##_sysent = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& syscallname, \ SYS_AUE_##syscallname \ } #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg) \ static struct syscall_module_data name##_syscall_mod = { \ evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL } \ }; \ \ static moduledata_t name##_mod = { \ "sys/" #name, \ syscall_module_handler, \ &name##_syscall_mod \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE) #define SYSCALL_MODULE_HELPER(syscallname) \ static int syscallname##_syscall = SYS_##syscallname; \ MAKE_SYSENT(syscallname); \ SYSCALL_MODULE(syscallname, \ & syscallname##_syscall, & syscallname##_sysent, \ NULL, NULL) #define SYSCALL_MODULE_PRESENT(syscallname) \ (sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys && \ sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys) /* * Syscall registration helpers with resource allocation handling. */ struct syscall_helper_data { struct sysent new_sysent; struct sysent old_sysent; int syscall_no; int registered; }; #define SYSCALL_INIT_HELPER_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& sys_ ## syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER(syscallname) \ SYSCALL_INIT_HELPER_F(syscallname, 0) #define SYSCALL_INIT_HELPER_COMPAT(syscallname) \ SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0) #define SYSCALL_INIT_LAST { \ .syscall_no = NO_SYSCALL \ } int syscall_module_handler(struct module *mod, int what, void *arg); int syscall_helper_register(struct syscall_helper_data *sd, int flags); int syscall_helper_unregister(struct syscall_helper_data *sd); /* Implementation, exposed for COMPAT code */ int kern_syscall_register(struct sysent *sysents, int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags); int kern_syscall_deregister(struct sysent *sysents, int offset, const struct sysent *old_sysent); int kern_syscall_module_handler(struct sysent *sysents, struct module *mod, int what, void *arg); int kern_syscall_helper_register(struct sysent *sysents, struct syscall_helper_data *sd, int flags); int kern_syscall_helper_unregister(struct sysent *sysents, struct syscall_helper_data *sd); struct proc; const char *syscallname(struct proc *p, u_int code); /* Special purpose system call functions. */ struct nosys_args; int lkmnosys(struct thread *, struct nosys_args *); int lkmressys(struct thread *, struct nosys_args *); int syscall_thread_enter(struct thread *td, struct sysent *se); void syscall_thread_exit(struct thread *td, struct sysent *se); int shared_page_alloc(int size, int align); int shared_page_fill(int size, int align, const void *data); void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); void exec_sysvec_init_secondary(struct sysentvec *sv, struct sysentvec *sv2); void exec_inittk(void); void exit_onexit(struct proc *p); void exec_free_abi_mappings(struct proc *p); void exec_onexec_old(struct thread *td); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ (sysinit_cfunc_t)exec_sysvec_init, sv); #endif /* _KERNEL */ #endif /* !_SYS_SYSENT_H_ */