diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index 6535d4bd2446..8cccdc254fe6 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -1,438 +1,446 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include "vdso_offsets.h" + +extern const char _binary_elf_vdso_so_1_start[]; +extern const char _binary_elf_vdso_so_1_end[]; +extern char _binary_elf_vdso_so_1_size; + struct sysentvec elf64_freebsd_sysvec_la48 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, - .sv_sigcode = sigcode, - .sv_szsigcode = &szsigcode, + .sv_sigcode = _binary_elf_vdso_so_1_start, + .sv_szsigcode = (int *)&_binary_elf_vdso_so_1_size, + .sv_sigcodeoff = VDSO_SIGCODE_OFFSET, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA48, .sv_usrstack = USRSTACK_LA48, .sv_psstrings = PS_STRINGS_LA48, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | - SV_TIMEKEEP | SV_RNG_SEED_VER, + SV_TIMEKEEP | SV_RNG_SEED_VER | SV_DSO_SIG, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE_LA48, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_stackgap = elf64_stackgap, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval = x86_set_fork_retval, }; struct sysentvec elf64_freebsd_sysvec_la57 = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, - .sv_sigcode = sigcode, - .sv_szsigcode = &szsigcode, + .sv_sigcode = _binary_elf_vdso_so_1_start, + .sv_szsigcode = (int *)&_binary_elf_vdso_so_1_size, + .sv_sigcodeoff = VDSO_SIGCODE_OFFSET, .sv_name = "FreeBSD ELF64", .sv_coredump = __elfN(coredump), .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = __elfN(prepare_notes), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS_LA57, .sv_usrstack = USRSTACK_LA57, .sv_psstrings = PS_STRINGS_LA57, .sv_stackprot = VM_PROT_ALL, .sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs), .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | - SV_TIMEKEEP | SV_RNG_SEED_VER, + SV_TIMEKEEP | SV_RNG_SEED_VER | SV_DSO_SIG, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE_LA57, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_stackgap = elf64_stackgap, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, .sv_set_fork_retval= x86_set_fork_retval, }; static void amd64_init_sysvecs(void *arg) { amd64_lower_shared_page(&elf64_freebsd_sysvec_la48); if (la57) { exec_sysvec_init(&elf64_freebsd_sysvec_la57); exec_sysvec_init_secondary(&elf64_freebsd_sysvec_la57, &elf64_freebsd_sysvec_la48); } else { exec_sysvec_init(&elf64_freebsd_sysvec_la48); } } SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, amd64_init_sysvecs, NULL); void amd64_lower_shared_page(struct sysentvec *sv) { if (hw_lower_amd64_sharedpage != 0) { sv->sv_maxuser -= PAGE_SIZE; sv->sv_shared_page_base -= PAGE_SIZE; sv->sv_usrstack -= PAGE_SIZE; sv->sv_psstrings -= PAGE_SIZE; } } static boolean_t freebsd_brand_info_la57_img_compat(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0) { if ((imgp->proc->p_md.md_flags & P_MD_LA57) != 0) return (TRUE); if (fctl0 == NULL || (*fctl0 & NT_FREEBSD_FCTL_LA48) != 0) return (FALSE); if ((imgp->proc->p_md.md_flags & P_MD_LA48) != 0) return (FALSE); return (TRUE); } static Elf64_Brandinfo freebsd_brand_info_la48 = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, }; static Elf64_Brandinfo freebsd_brand_info_la57 = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la57, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported = freebsd_brand_info_la57_img_compat, }; static void sysinit_register_elf64_brand_entries(void *arg __unused) { /* * _57 must go first so it can either claim the image or hand * it to _48. */ if (la57) elf64_insert_brand_entry(&freebsd_brand_info_la57); elf64_insert_brand_entry(&freebsd_brand_info_la48); } SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, sysinit_register_elf64_brand_entries, NULL); static Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo); static Elf64_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld-kfreebsd-x86-64.so.1", .sysvec = &elf64_freebsd_sysvec_la48, .interp_newpath = NULL, .brand_note = &elf64_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info); void elf64_dump_thread(struct thread *td, void *dst, size_t *off) { void *buf; size_t len; len = 0; if (use_xsave) { if (dst != NULL) { fpugetregs(td); len += elf64_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } *off = len; } bool elf_is_ifunc_reloc(Elf_Size r_info) { return (ELF_R_TYPE(r_info) == R_X86_64_IRELATIVE); } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, bool late_ifunc, elf_lookup_fn lookup) { Elf64_Addr *where, val; Elf32_Addr *where32, val32; Elf_Addr addr; Elf_Addr addend; Elf_Size rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); /* Addend is 32 bit on 32 bit relocs */ switch (rtype) { case R_X86_64_PC32: case R_X86_64_32S: case R_X86_64_PLT32: addend = *(Elf32_Addr *)where; break; default: addend = *where; break; } break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (late_ifunc) { KASSERT(type == ELF_RELOC_RELA, ("Only RELA ifunc relocations are supported")); if (rtype != R_X86_64_IRELATIVE) return (0); } switch (rtype) { case R_X86_64_NONE: /* none */ break; case R_X86_64_64: /* S + A */ error = lookup(lf, symidx, 1, &addr); val = addr + addend; if (error != 0) return (-1); if (*where != val) *where = val; break; case R_X86_64_PC32: /* S + A - P */ case R_X86_64_PLT32: /* L + A - P, L is PLT location for the symbol, which we treat as S */ error = lookup(lf, symidx, 1, &addr); where32 = (Elf32_Addr *)where; val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where); if (error != 0) return (-1); if (*where32 != val32) *where32 = val32; break; case R_X86_64_32S: /* S + A sign extend */ error = lookup(lf, symidx, 1, &addr); val32 = (Elf32_Addr)(addr + addend); where32 = (Elf32_Addr *)where; if (error != 0) return (-1); if (*where32 != val32) *where32 = val32; break; case R_X86_64_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation, " "symbol index %ld\n", symidx); return (-1); case R_X86_64_GLOB_DAT: /* S */ case R_X86_64_JMP_SLOT: /* XXX need addend + offset */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return (-1); if (*where != addr) *where = addr; break; case R_X86_64_RELATIVE: /* B + A */ addr = elf_relocaddr(lf, relocbase + addend); val = addr; if (*where != val) *where = val; break; case R_X86_64_IRELATIVE: addr = relocbase + addend; val = ((Elf64_Addr (*)(void))addr)(); if (*where != val) *where = val; break; default: printf("kldload: unexpected relocation type %ld, " "symbol index %ld\n", rtype, symidx); return (-1); } return (0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, false, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, false, lookup)); } int elf_reloc_late(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, true, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } int elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused) { return (0); } diff --git a/sys/amd64/amd64/sigtramp.S b/sys/amd64/amd64/sigtramp.S index 9983edf229a3..05bf30293a9a 100644 --- a/sys/amd64/amd64/sigtramp.S +++ b/sys/amd64/amd64/sigtramp.S @@ -1,56 +1,48 @@ /*- * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include - #include #include "assym.inc" .text -/********************************************************************** - * - * Signal trampoline, copied to top of user stack - * +/* + * Signal trampoline, mapped as vdso into shared page. */ -ENTRY(sigcode) +ENTRY(__vdso_sigcode) call *SIGF_HANDLER(%rsp) /* call signal handler */ lea SIGF_UC(%rsp),%rdi /* get ucontext_t */ pushq $0 /* junk to fake return addr. */ movq $SYS_sigreturn,%rax syscall /* enter kernel with args */ 0: hlt /* trap priviliged instruction */ jmp 0b +END(__vdso_sigcode) - ALIGN_TEXT -esigcode: - - .data - .globl szsigcode -szsigcode: - .long esigcode-sigcode + .section .note.GNU-stack,"",%progbits diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 125a8cff6705..8c08beab0756 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -1,399 +1,404 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # common files stuff between i386 and amd64 include "conf/files.x86" # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # # +elf-vdso.so.o standard \ + dependency "$S/amd64/amd64/sigtramp.S assym.inc $S/tools/amd64_vdso.sh" \ + compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' OBJCOPY='${OBJCOPY}' S='${S}' sh $S/tools/amd64_vdso.sh" \ + no-implicit-rule before-depend \ + clean "elf-vdso.so.o elf-vdso.so.1 vdso_offsets.h sigtramp.pico" +# ia32_genassym.o standard \ dependency "$S/compat/ia32/ia32_genassym.c offset.inc" \ compile-with "${CC} ${CFLAGS:N-flto:N-fno-common} -fcommon -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "ia32_genassym.o" # ia32_assym.h standard \ dependency "$S/kern/genassym.sh ia32_genassym.o" \ compile-with "env NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "ia32_assym.h" # amd64/acpica/acpi_machdep.c optional acpi amd64/acpica/acpi_wakeup.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/amd64/acpica/acpi_wakecode.S assym.inc" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # #amd64/amd64/apic_vector.S standard amd64/amd64/bios.c standard amd64/amd64/bpf_jit_machdep.c optional bpf_jitter amd64/amd64/copyout.c standard amd64/amd64/cpu_switch.S standard amd64/amd64/db_disasm.c optional ddb amd64/amd64/db_interface.c optional ddb amd64/amd64/db_trace.c optional ddb amd64/amd64/efirt_machdep.c optional efirt amd64/amd64/efirt_support.S optional efirt amd64/amd64/elf_machdep.c standard amd64/amd64/exception.S standard amd64/amd64/exec_machdep.c standard amd64/amd64/fpu.c standard amd64/amd64/gdb_machdep.c optional gdb amd64/amd64/initcpu.c standard amd64/amd64/io.c optional io amd64/amd64/locore.S standard no-obj amd64/amd64/xen-locore.S optional xenhvm \ compile-with "${NORMAL_S} -g0" \ no-ctfconvert amd64/amd64/machdep.c standard amd64/amd64/mem.c optional mem amd64/amd64/minidump_machdep.c standard amd64/amd64/mp_machdep.c optional smp amd64/amd64/mpboot.S optional smp amd64/amd64/pmap.c standard amd64/amd64/ptrace_machdep.c standard -amd64/amd64/sigtramp.S standard amd64/amd64/support.S standard amd64/amd64/sys_machdep.c standard amd64/amd64/trap.c standard amd64/amd64/uio_machdep.c standard amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard amd64/pci/pci_cfgreg.c optional pci cddl/dev/dtrace/amd64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/amd64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" crypto/aesni/aeskeys_amd64.S optional aesni crypto/des/des_enc.c optional netsmb crypto/openssl/amd64/aesni-x86_64.S optional ossl crypto/openssl/amd64/chacha-x86_64.S optional ossl crypto/openssl/amd64/poly1305-x86_64.S optional ossl crypto/openssl/amd64/sha1-x86_64.S optional ossl crypto/openssl/amd64/sha256-x86_64.S optional ossl crypto/openssl/amd64/sha512-x86_64.S optional ossl dev/acpi_support/acpi_wmi_if.m standard dev/agp/agp_amd64.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_via.c optional agp dev/amdgpio/amdgpio.c optional amdgpio dev/axgbe/if_axgbe_pci.c optional axp dev/axgbe/xgbe-desc.c optional axp dev/axgbe/xgbe-dev.c optional axp dev/axgbe/xgbe-drv.c optional axp dev/axgbe/xgbe-mdio.c optional axp dev/axgbe/xgbe-sysctl.c optional axp dev/axgbe/xgbe-txrx.c optional axp dev/axgbe/xgbe_osdep.c optional axp dev/axgbe/xgbe-i2c.c optional axp dev/axgbe/xgbe-phy-v2.c optional axp dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv dev/iavf/if_iavf_iflib.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_lib.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_osdep.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_txrx_iflib.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_common.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_adminq.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_vc_common.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/iavf/iavf_vc_iflib.c optional iavf pci \ compile-with "${NORMAL_C} -I$S/dev/iavf" dev/ice/if_ice_iflib.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_lib.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_osdep.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_resmgr.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_strings.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_iflib_recovery_txrx.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_iflib_txrx.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_common.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_controlq.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_dcb.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_flex_pipe.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_flow.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_nvm.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_sched.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_sriov.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_switch.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_vlan_mode.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" ice_ddp.c optional ice_ddp \ compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01031800 -mice_ddp -c${.TARGET}" \ no-ctfconvert no-implicit-rule before-depend local \ clean "ice_ddp.c" ice_ddp.fwo optional ice_ddp \ dependency "ice_ddp.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ice_ddp.fwo" ice_ddp.fw optional ice_ddp \ dependency "$S/contrib/dev/ice/ice-1.3.24.0.pkg" \ compile-with "${CP} $S/contrib/dev/ice/ice-1.3.24.0.pkg ice_ddp.fw" \ no-obj no-implicit-rule \ clean "ice_ddp.fw" dev/ioat/ioat.c optional ioat pci dev/ioat/ioat_test.c optional ioat pci dev/ixl/if_ixl.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_main.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_iflib.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_qmgr.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_iov.c optional ixl pci pci_iov \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_i2c.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_txrx.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_osdep.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_lan_hmc.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_hmc.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_common.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_nvm.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_adminq.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_dcb.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/nctgpio/nctgpio.c optional nctgpio dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_transport.c optional ntb_transport | if_ntb dev/ntb/ntb.c optional ntb | ntb_transport | if_ntb | ntb_hw_amd | ntb_hw_intel | ntb_hw_plx | ntb_hw dev/ntb/ntb_if.m optional ntb | ntb_transport | if_ntb | ntb_hw_amd | ntb_hw_intel | ntb_hw_plx | ntb_hw dev/ntb/ntb_hw/ntb_hw_amd.c optional ntb_hw_amd | ntb_hw dev/ntb/ntb_hw/ntb_hw_intel.c optional ntb_hw_intel | ntb_hw dev/ntb/ntb_hw/ntb_hw_plx.c optional ntb_hw_plx | ntb_hw dev/ntb/test/ntb_tool.c optional ntb_tool dev/nvram/nvram.c optional nvram isa dev/random/ivy.c optional rdrand_rng !random_loadable dev/random/nehemiah.c optional padlock_rng !random_loadable dev/qlxge/qls_dbg.c optional qlxge pci dev/qlxge/qls_dump.c optional qlxge pci dev/qlxge/qls_hw.c optional qlxge pci dev/qlxge/qls_ioctl.c optional qlxge pci dev/qlxge/qls_isr.c optional qlxge pci dev/qlxge/qls_os.c optional qlxge pci dev/qlxgb/qla_dbg.c optional qlxgb pci dev/qlxgb/qla_hw.c optional qlxgb pci dev/qlxgb/qla_ioctl.c optional qlxgb pci dev/qlxgb/qla_isr.c optional qlxgb pci dev/qlxgb/qla_misc.c optional qlxgb pci dev/qlxgb/qla_os.c optional qlxgb pci dev/qlxgbe/ql_dbg.c optional qlxgbe pci dev/qlxgbe/ql_hw.c optional qlxgbe pci dev/qlxgbe/ql_ioctl.c optional qlxgbe pci dev/qlxgbe/ql_isr.c optional qlxgbe pci dev/qlxgbe/ql_misc.c optional qlxgbe pci dev/qlxgbe/ql_os.c optional qlxgbe pci dev/qlxgbe/ql_reset.c optional qlxgbe pci dev/qlxgbe/ql_fw.c optional qlxgbe pci dev/qlxgbe/ql_boot.c optional qlxgbe pci dev/qlxgbe/ql_minidump.c optional qlxgbe pci dev/qlnx/qlnxe/ecore_cxt.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_dbg_fw_funcs.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_dcbx.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_dev.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_hw.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_init_fw_funcs.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_init_ops.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_int.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_l2.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_mcp.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_sp_commands.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/ecore_spq.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/qlnx_ioctl.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/qlnx/qlnxe/qlnx_os.c optional qlnxe pci \ compile-with "${LINUXKPI_C}" dev/sfxge/common/ef10_ev.c optional sfxge pci dev/sfxge/common/ef10_filter.c optional sfxge pci dev/sfxge/common/ef10_image.c optional sfxge pci dev/sfxge/common/ef10_intr.c optional sfxge pci dev/sfxge/common/ef10_mac.c optional sfxge pci dev/sfxge/common/ef10_mcdi.c optional sfxge pci dev/sfxge/common/ef10_nic.c optional sfxge pci dev/sfxge/common/ef10_nvram.c optional sfxge pci dev/sfxge/common/ef10_phy.c optional sfxge pci dev/sfxge/common/ef10_rx.c optional sfxge pci dev/sfxge/common/ef10_tx.c optional sfxge pci dev/sfxge/common/ef10_vpd.c optional sfxge pci dev/sfxge/common/efx_bootcfg.c optional sfxge pci dev/sfxge/common/efx_crc32.c optional sfxge pci dev/sfxge/common/efx_ev.c optional sfxge pci dev/sfxge/common/efx_filter.c optional sfxge pci dev/sfxge/common/efx_hash.c optional sfxge pci dev/sfxge/common/efx_intr.c optional sfxge pci dev/sfxge/common/efx_lic.c optional sfxge pci dev/sfxge/common/efx_mac.c optional sfxge pci dev/sfxge/common/efx_mcdi.c optional sfxge pci dev/sfxge/common/efx_mon.c optional sfxge pci dev/sfxge/common/efx_nic.c optional sfxge pci dev/sfxge/common/efx_nvram.c optional sfxge pci dev/sfxge/common/efx_phy.c optional sfxge pci dev/sfxge/common/efx_port.c optional sfxge pci dev/sfxge/common/efx_rx.c optional sfxge pci dev/sfxge/common/efx_sram.c optional sfxge pci dev/sfxge/common/efx_tunnel.c optional sfxge pci dev/sfxge/common/efx_tx.c optional sfxge pci dev/sfxge/common/efx_vpd.c optional sfxge pci dev/sfxge/common/hunt_nic.c optional sfxge pci dev/sfxge/common/mcdi_mon.c optional sfxge pci dev/sfxge/common/medford_nic.c optional sfxge pci dev/sfxge/common/medford2_nic.c optional sfxge pci dev/sfxge/common/siena_mac.c optional sfxge pci dev/sfxge/common/siena_mcdi.c optional sfxge pci dev/sfxge/common/siena_nic.c optional sfxge pci dev/sfxge/common/siena_nvram.c optional sfxge pci dev/sfxge/common/siena_phy.c optional sfxge pci dev/sfxge/common/siena_sram.c optional sfxge pci dev/sfxge/common/siena_vpd.c optional sfxge pci dev/sfxge/sfxge.c optional sfxge pci dev/sfxge/sfxge_dma.c optional sfxge pci dev/sfxge/sfxge_ev.c optional sfxge pci dev/sfxge/sfxge_intr.c optional sfxge pci dev/sfxge/sfxge_mcdi.c optional sfxge pci dev/sfxge/sfxge_nvram.c optional sfxge pci dev/sfxge/sfxge_port.c optional sfxge pci dev/sfxge/sfxge_rx.c optional sfxge pci dev/sfxge/sfxge_tx.c optional sfxge pci dev/smartpqi/smartpqi_cam.c optional smartpqi dev/smartpqi/smartpqi_cmd.c optional smartpqi dev/smartpqi/smartpqi_discovery.c optional smartpqi dev/smartpqi/smartpqi_event.c optional smartpqi dev/smartpqi/smartpqi_helper.c optional smartpqi dev/smartpqi/smartpqi_init.c optional smartpqi dev/smartpqi/smartpqi_intr.c optional smartpqi dev/smartpqi/smartpqi_ioctl.c optional smartpqi dev/smartpqi/smartpqi_main.c optional smartpqi dev/smartpqi/smartpqi_mem.c optional smartpqi dev/smartpqi/smartpqi_misc.c optional smartpqi dev/smartpqi/smartpqi_queue.c optional smartpqi dev/smartpqi/smartpqi_request.c optional smartpqi dev/smartpqi/smartpqi_response.c optional smartpqi dev/smartpqi/smartpqi_sis.c optional smartpqi dev/smartpqi/smartpqi_tag.c optional smartpqi dev/speaker/spkr.c optional speaker dev/sume/if_sume.c optional sume dev/superio/superio.c optional superio isa dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/tpm/tpm.c optional tpm dev/tpm/tpm20.c optional tpm dev/tpm/tpm_crb.c optional tpm acpi dev/tpm/tpm_tis.c optional tpm acpi dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmd/vmd.c optional vmd | vmd_bus dev/wbwd/wbwd.c optional wbwd dev/p2sb/p2sb.c optional p2sb pci dev/p2sb/lewisburg_gpiocm.c optional lbggpiocm p2sb dev/p2sb/lewisburg_gpio.c optional lbggpio lbggpiocm isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/imgact_aout.c optional compat_aout kern/link_elf_obj.c standard # # IA32 binary support # #amd64/ia32/ia32_exception.S optional compat_freebsd32 amd64/ia32/ia32_reg.c optional compat_freebsd32 amd64/ia32/ia32_signal.c optional compat_freebsd32 amd64/ia32/ia32_sigtramp.S optional compat_freebsd32 amd64/ia32/ia32_syscall.c optional compat_freebsd32 amd64/ia32/ia32_misc.c optional compat_freebsd32 compat/ia32/ia32_sysvec.c optional compat_freebsd32 # # x86 real mode BIOS emulator, required by dpms/pci/vesa # compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa contrib/x86emu/x86emu.c optional x86bios | dpms | pci | vesa # Common files where we currently configure the system differently, but perhaps shouldn't # config(8) doesn't have a way to force standard options, so we've been inconsistent # about marking non-optional things 'standard'. x86/acpica/madt.c optional acpi x86/isa/atpic.c optional atpic isa x86/isa/elcr.c optional atpic isa | mptable x86/isa/isa.c standard x86/isa/isa_dma.c standard x86/pci/pci_early_quirks.c optional pci x86/x86/io_apic.c standard x86/x86/local_apic.c standard x86/x86/mptable.c optional mptable x86/x86/mptable_pci.c optional mptable pci x86/x86/msi.c optional pci x86/xen/pv.c optional xenhvm contrib/openzfs/module/zcommon/zfs_fletcher_avx512.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zcommon/zfs_fletcher_intel.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zcommon/zfs_fletcher_sse.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zfs/vdev_raidz_math_avx2.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zfs/vdev_raidz_math_avx512bw.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zfs/vdev_raidz_math_avx512f.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zfs/vdev_raidz_math_sse2.c optional zfs compile-with "${ZFS_C}" contrib/openzfs/module/zfs/vdev_raidz_math_ssse3.c optional zfs compile-with "${ZFS_C}" diff --git a/sys/conf/vdso_amd64.ldscript b/sys/conf/vdso_amd64.ldscript new file mode 100644 index 000000000000..d412abd4cd02 --- /dev/null +++ b/sys/conf/vdso_amd64.ldscript @@ -0,0 +1,89 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Linker script for amd64 vdso. + */ + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(5); + eh_frame_hdr PT_GNU_EH_FRAME FLAGS(5); +} + +SECTIONS +{ + . = . + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } :text + .dynsym : { *(.dynsym) } :text + .dynstr : { *(.dynstr) } :text + .gnu.version : { *(.gnu.version) } :text + .gnu.version_d : { *(.gnu.version_d) } :text + .gnu.version_r : { *(.gnu.version_r) } :text + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .rodata : { *(.rodata*) } :text + .data : { + *(.got.plt) *(.got) + } :text + /DISCARD/ /* .data */: { + *(.data*) + *(.sdata*) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + *(.ctors) + *(.dtors) + *(.jcr) + *(.init_array) + *(.init) + *(.fini) + *(.debug*) + *(.comment) + } + + . = ALIGN(0x10); + .text : { *(.text .text*) } :text =0x90909090 +} + +VERSION +{ + FBSD_1.7 { + global: + __vdso_sigcode; + local: + *; + }; +} diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 2f029e261427..306b1234efa9 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1,3456 +1,3459 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktrace.h" #include "opt_kstack_pages.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #endif SDT_PROVIDER_DEFINE(proc); MALLOC_DEFINE(M_SESSION, "session", "session header"); static MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); static void doenterpgrp(struct proc *, struct pgrp *); static void orphanpg(struct pgrp *pg); static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp); static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp); static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread); static void pgdelete(struct pgrp *); static int pgrp_init(void *mem, int size, int flags); static int proc_ctor(void *mem, int size, void *arg, int flags); static void proc_dtor(void *mem, int size, void *arg); static int proc_init(void *mem, int size, int flags); static void proc_fini(void *mem, int size); static void pargs_free(struct pargs *pa); /* * Other process lists */ struct pidhashhead *pidhashtbl = NULL; struct sx *pidhashtbl_lock; u_long pidhash; u_long pidhashlock; struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc = LIST_HEAD_INITIALIZER(allproc); struct sx __exclusive_cache_line allproc_lock; struct sx __exclusive_cache_line proctree_lock; struct mtx __exclusive_cache_line ppeers_lock; struct mtx __exclusive_cache_line procid_lock; uma_zone_t proc_zone; uma_zone_t pgrp_zone; /* * The offset of various fields in struct proc and struct thread. * These are used by kernel debuggers to enumerate kernel threads and * processes. */ const int proc_off_p_pid = offsetof(struct proc, p_pid); const int proc_off_p_comm = offsetof(struct proc, p_comm); const int proc_off_p_list = offsetof(struct proc, p_list); const int proc_off_p_hash = offsetof(struct proc, p_hash); const int proc_off_p_threads = offsetof(struct proc, p_threads); const int thread_off_td_tid = offsetof(struct thread, td_tid); const int thread_off_td_name = offsetof(struct thread, td_name); const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu); const int thread_off_td_pcb = offsetof(struct thread, td_pcb); const int thread_off_td_plist = offsetof(struct thread, td_plist); EVENTHANDLER_LIST_DEFINE(process_ctor); EVENTHANDLER_LIST_DEFINE(process_dtor); EVENTHANDLER_LIST_DEFINE(process_init); EVENTHANDLER_LIST_DEFINE(process_fini); EVENTHANDLER_LIST_DEFINE(process_exit); EVENTHANDLER_LIST_DEFINE(process_fork); EVENTHANDLER_LIST_DEFINE(process_exec); int kstack_pages = KSTACK_PAGES; SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, "Kernel stack size in pages"); static int vmmap_skip_res_cnt = 0; SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW, &vmmap_skip_res_cnt, 0, "Skip calculation of the pages resident count in kern.proc.vmmap"); CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); #ifdef COMPAT_FREEBSD32 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE); #endif /* * Initialize global process hashing structures. */ void procinit(void) { u_long i; sx_init(&allproc_lock, "allproc"); sx_init(&proctree_lock, "proctree"); mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF); mtx_init(&procid_lock, "procid", NULL, MTX_DEF); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); pidhashlock = (pidhash + 1) / 64; if (pidhashlock > 0) pidhashlock--; pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1), M_PROC, M_WAITOK | M_ZERO); for (i = 0; i < pidhashlock + 1; i++) sx_init_flags(&pidhashtbl_lock[i], "pidhash", SX_DUPOK); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); proc_zone = uma_zcreate("PROC", sched_sizeof_proc(), proc_ctor, proc_dtor, proc_init, proc_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); pgrp_zone = uma_zcreate("PGRP", sizeof(struct pgrp), NULL, NULL, pgrp_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uihashinit(); } /* * Prepare a proc for use. */ static int proc_ctor(void *mem, int size, void *arg, int flags) { struct proc *p; struct thread *td; p = (struct proc *)mem; #ifdef KDTRACE_HOOKS kdtrace_proc_ctor(p); #endif EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); td = FIRST_THREAD_IN_PROC(p); if (td != NULL) { /* Make sure all thread constructors are executed */ EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); } return (0); } /* * Reclaim a proc after use. */ static void proc_dtor(void *mem, int size, void *arg) { struct proc *p; struct thread *td; /* INVARIANTS checks go here */ p = (struct proc *)mem; td = FIRST_THREAD_IN_PROC(p); if (td != NULL) { #ifdef INVARIANTS KASSERT((p->p_numthreads == 1), ("bad number of threads in exiting process")); KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr")); #endif /* Free all OSD associated to this thread. */ osd_thread_exit(td); td_softdep_cleanup(td); MPASS(td->td_su == NULL); /* Make sure all thread destructors are executed */ EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td); } EVENTHANDLER_DIRECT_INVOKE(process_dtor, p); #ifdef KDTRACE_HOOKS kdtrace_proc_dtor(p); #endif if (p->p_ksi != NULL) KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue")); } /* * Initialize type-stable parts of a proc (when newly created). */ static int proc_init(void *mem, int size, int flags) { struct proc *p; p = (struct proc *)mem; mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW); mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW); cv_init(&p->p_pwait, "ppwait"); TAILQ_INIT(&p->p_threads); /* all threads in proc */ EVENTHANDLER_DIRECT_INVOKE(process_init, p); p->p_stats = pstats_alloc(); p->p_pgrp = NULL; return (0); } /* * UMA should ensure that this function is never called. * Freeing a proc structure would violate type stability. */ static void proc_fini(void *mem, int size) { #ifdef notnow struct proc *p; p = (struct proc *)mem; EVENTHANDLER_DIRECT_INVOKE(process_fini, p); pstats_free(p->p_stats); thread_free(FIRST_THREAD_IN_PROC(p)); mtx_destroy(&p->p_mtx); if (p->p_ksi != NULL) ksiginfo_free(p->p_ksi); #else panic("proc reclaimed"); #endif } static int pgrp_init(void *mem, int size, int flags) { struct pgrp *pg; pg = mem; mtx_init(&pg->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); return (0); } /* * PID space management. * * These bitmaps are used by fork_findpid. */ bitstr_t bit_decl(proc_id_pidmap, PID_MAX); bitstr_t bit_decl(proc_id_grpidmap, PID_MAX); bitstr_t bit_decl(proc_id_sessidmap, PID_MAX); bitstr_t bit_decl(proc_id_reapmap, PID_MAX); static bitstr_t *proc_id_array[] = { proc_id_pidmap, proc_id_grpidmap, proc_id_sessidmap, proc_id_reapmap, }; void proc_id_set(int type, pid_t id) { KASSERT(type >= 0 && type < nitems(proc_id_array), ("invalid type %d\n", type)); mtx_lock(&procid_lock); KASSERT(bit_test(proc_id_array[type], id) == 0, ("bit %d already set in %d\n", id, type)); bit_set(proc_id_array[type], id); mtx_unlock(&procid_lock); } void proc_id_set_cond(int type, pid_t id) { KASSERT(type >= 0 && type < nitems(proc_id_array), ("invalid type %d\n", type)); if (bit_test(proc_id_array[type], id)) return; mtx_lock(&procid_lock); bit_set(proc_id_array[type], id); mtx_unlock(&procid_lock); } void proc_id_clear(int type, pid_t id) { KASSERT(type >= 0 && type < nitems(proc_id_array), ("invalid type %d\n", type)); mtx_lock(&procid_lock); KASSERT(bit_test(proc_id_array[type], id) != 0, ("bit %d not set in %d\n", id, type)); bit_clear(proc_id_array[type], id); mtx_unlock(&procid_lock); } /* * Is p an inferior of the current process? */ int inferior(struct proc *p) { sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); for (; p != curproc; p = proc_realparent(p)) { if (p->p_pid == 0) return (0); } return (1); } /* * Shared lock all the pid hash lists. */ void pidhash_slockall(void) { u_long i; for (i = 0; i < pidhashlock + 1; i++) sx_slock(&pidhashtbl_lock[i]); } /* * Shared unlock all the pid hash lists. */ void pidhash_sunlockall(void) { u_long i; for (i = 0; i < pidhashlock + 1; i++) sx_sunlock(&pidhashtbl_lock[i]); } /* * Similar to pfind_any(), this function finds zombies. */ struct proc * pfind_any_locked(pid_t pid) { struct proc *p; sx_assert(PIDHASHLOCK(pid), SX_LOCKED); LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); p = NULL; } break; } } return (p); } /* * Locate a process by number. * * By not returning processes in the PRS_NEW state, we allow callers to avoid * testing for that condition to avoid dereferencing p_ucred, et al. */ static __always_inline struct proc * _pfind(pid_t pid, bool zombie) { struct proc *p; p = curproc; if (p->p_pid == pid) { PROC_LOCK(p); return (p); } sx_slock(PIDHASHLOCK(pid)); LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { PROC_LOCK(p); if (p->p_state == PRS_NEW || (!zombie && p->p_state == PRS_ZOMBIE)) { PROC_UNLOCK(p); p = NULL; } break; } } sx_sunlock(PIDHASHLOCK(pid)); return (p); } struct proc * pfind(pid_t pid) { return (_pfind(pid, false)); } /* * Same as pfind but allow zombies. */ struct proc * pfind_any(pid_t pid) { return (_pfind(pid, true)); } /* * Locate a process group by number. * The caller must hold proctree_lock. */ struct pgrp * pgfind(pid_t pgid) { struct pgrp *pgrp; sx_assert(&proctree_lock, SX_LOCKED); LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) { if (pgrp->pg_id == pgid) { PGRP_LOCK(pgrp); return (pgrp); } } return (NULL); } /* * Locate process and do additional manipulations, depending on flags. */ int pget(pid_t pid, int flags, struct proc **pp) { struct proc *p; struct thread *td1; int error; p = curproc; if (p->p_pid == pid) { PROC_LOCK(p); } else { p = NULL; if (pid <= PID_MAX) { if ((flags & PGET_NOTWEXIT) == 0) p = pfind_any(pid); else p = pfind(pid); } else if ((flags & PGET_NOTID) == 0) { td1 = tdfind(pid, -1); if (td1 != NULL) p = td1->td_proc; } if (p == NULL) return (ESRCH); if ((flags & PGET_CANSEE) != 0) { error = p_cansee(curthread, p); if (error != 0) goto errout; } } if ((flags & PGET_CANDEBUG) != 0) { error = p_candebug(curthread, p); if (error != 0) goto errout; } if ((flags & PGET_ISCURRENT) != 0 && curproc != p) { error = EPERM; goto errout; } if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto errout; } if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) { /* * XXXRW: Not clear ESRCH is the right error during proc * execve(). */ error = ESRCH; goto errout; } if ((flags & PGET_HOLD) != 0) { _PHOLD(p); PROC_UNLOCK(p); } *pp = p; return (0); errout: PROC_UNLOCK(p); return (error); } /* * Create a new process group. * pgid must be equal to the pid of p. * Begin a new session if required. */ int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess) { sx_assert(&proctree_lock, SX_XLOCKED); KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL")); KASSERT(p->p_pid == pgid, ("enterpgrp: new pgrp and pid != pgid")); KASSERT(pgfind(pgid) == NULL, ("enterpgrp: pgrp with pgid exists")); KASSERT(!SESS_LEADER(p), ("enterpgrp: session leader attempted setpgrp")); if (sess != NULL) { /* * new session */ mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF); PROC_LOCK(p); p->p_flag &= ~P_CONTROLT; PROC_UNLOCK(p); PGRP_LOCK(pgrp); sess->s_leader = p; sess->s_sid = p->p_pid; proc_id_set(PROC_ID_SESSION, p->p_pid); refcount_init(&sess->s_count, 1); sess->s_ttyvp = NULL; sess->s_ttydp = NULL; sess->s_ttyp = NULL; bcopy(p->p_session->s_login, sess->s_login, sizeof(sess->s_login)); pgrp->pg_session = sess; KASSERT(p == curproc, ("enterpgrp: mksession and p != curproc")); } else { pgrp->pg_session = p->p_session; sess_hold(pgrp->pg_session); PGRP_LOCK(pgrp); } pgrp->pg_id = pgid; proc_id_set(PROC_ID_GROUP, p->p_pid); LIST_INIT(&pgrp->pg_members); pgrp->pg_flags = 0; /* * As we have an exclusive lock of proctree_lock, * this should not deadlock. */ LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); SLIST_INIT(&pgrp->pg_sigiolst); PGRP_UNLOCK(pgrp); doenterpgrp(p, pgrp); return (0); } /* * Move p to an existing process group */ int enterthispgrp(struct proc *p, struct pgrp *pgrp) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED); KASSERT(pgrp->pg_session == p->p_session, ("%s: pgrp's session %p, p->p_session %p proc %p\n", __func__, pgrp->pg_session, p->p_session, p)); KASSERT(pgrp != p->p_pgrp, ("%s: p %p belongs to pgrp %p", __func__, p, pgrp)); doenterpgrp(p, pgrp); return (0); } /* * If true, any child of q which belongs to group pgrp, qualifies the * process group pgrp as not orphaned. */ static bool isjobproc(struct proc *q, struct pgrp *pgrp) { sx_assert(&proctree_lock, SX_LOCKED); return (q->p_pgrp != pgrp && q->p_pgrp->pg_session == pgrp->pg_session); } static struct proc * jobc_reaper(struct proc *p) { struct proc *pp; sx_assert(&proctree_lock, SA_LOCKED); for (pp = p;;) { pp = pp->p_reaper; if (pp->p_reaper == pp || (pp->p_treeflag & P_TREE_GRPEXITED) == 0) return (pp); } } static struct proc * jobc_parent(struct proc *p, struct proc *p_exiting) { struct proc *pp; sx_assert(&proctree_lock, SA_LOCKED); pp = proc_realparent(p); if (pp->p_pptr == NULL || pp == p_exiting || (pp->p_treeflag & P_TREE_GRPEXITED) == 0) return (pp); return (jobc_reaper(pp)); } static int pgrp_calc_jobc(struct pgrp *pgrp) { struct proc *q; int cnt; #ifdef INVARIANTS if (!mtx_owned(&pgrp->pg_mtx)) sx_assert(&proctree_lock, SA_LOCKED); #endif cnt = 0; LIST_FOREACH(q, &pgrp->pg_members, p_pglist) { if ((q->p_treeflag & P_TREE_GRPEXITED) != 0 || q->p_pptr == NULL) continue; if (isjobproc(jobc_parent(q, NULL), pgrp)) cnt++; } return (cnt); } /* * Move p to a process group */ static void doenterpgrp(struct proc *p, struct pgrp *pgrp) { struct pgrp *savepgrp; struct proc *pp; sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED); savepgrp = p->p_pgrp; pp = jobc_parent(p, NULL); PGRP_LOCK(pgrp); PGRP_LOCK(savepgrp); if (isjobproc(pp, savepgrp) && pgrp_calc_jobc(savepgrp) == 1) orphanpg(savepgrp); PROC_LOCK(p); LIST_REMOVE(p, p_pglist); p->p_pgrp = pgrp; PROC_UNLOCK(p); LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); if (isjobproc(pp, pgrp)) pgrp->pg_flags &= ~PGRP_ORPHANED; PGRP_UNLOCK(savepgrp); PGRP_UNLOCK(pgrp); if (LIST_EMPTY(&savepgrp->pg_members)) pgdelete(savepgrp); } /* * remove process from process group */ int leavepgrp(struct proc *p) { struct pgrp *savepgrp; sx_assert(&proctree_lock, SX_XLOCKED); savepgrp = p->p_pgrp; PGRP_LOCK(savepgrp); PROC_LOCK(p); LIST_REMOVE(p, p_pglist); p->p_pgrp = NULL; PROC_UNLOCK(p); PGRP_UNLOCK(savepgrp); if (LIST_EMPTY(&savepgrp->pg_members)) pgdelete(savepgrp); return (0); } /* * delete a process group */ static void pgdelete(struct pgrp *pgrp) { struct session *savesess; struct tty *tp; sx_assert(&proctree_lock, SX_XLOCKED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pgid. The proctree lock ensures that * new sigio structures will not be added after this point. */ funsetownlst(&pgrp->pg_sigiolst); PGRP_LOCK(pgrp); tp = pgrp->pg_session->s_ttyp; LIST_REMOVE(pgrp, pg_hash); savesess = pgrp->pg_session; PGRP_UNLOCK(pgrp); /* Remove the reference to the pgrp before deallocating it. */ if (tp != NULL) { tty_lock(tp); tty_rel_pgrp(tp, pgrp); } proc_id_clear(PROC_ID_GROUP, pgrp->pg_id); uma_zfree(pgrp_zone, pgrp); sess_release(savesess); } static void fixjobc_kill(struct proc *p) { struct proc *q; struct pgrp *pgrp; sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); pgrp = p->p_pgrp; PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); /* * p no longer affects process group orphanage for children. * It is marked by the flag because p is only physically * removed from its process group on wait(2). */ MPASS((p->p_treeflag & P_TREE_GRPEXITED) == 0); p->p_treeflag |= P_TREE_GRPEXITED; /* * Check if exiting p orphans its own group. */ pgrp = p->p_pgrp; if (isjobproc(jobc_parent(p, NULL), pgrp)) { PGRP_LOCK(pgrp); if (pgrp_calc_jobc(pgrp) == 0) orphanpg(pgrp); PGRP_UNLOCK(pgrp); } /* * Check this process' children to see whether they qualify * their process groups after reparenting to reaper. */ LIST_FOREACH(q, &p->p_children, p_sibling) { pgrp = q->p_pgrp; PGRP_LOCK(pgrp); if (pgrp_calc_jobc(pgrp) == 0) { /* * We want to handle exactly the children that * has p as realparent. Then, when calculating * jobc_parent for children, we should ignore * P_TREE_GRPEXITED flag already set on p. */ if (jobc_parent(q, p) == p && isjobproc(p, pgrp)) orphanpg(pgrp); } else pgrp->pg_flags &= ~PGRP_ORPHANED; PGRP_UNLOCK(pgrp); } LIST_FOREACH(q, &p->p_orphans, p_orphan) { pgrp = q->p_pgrp; PGRP_LOCK(pgrp); if (pgrp_calc_jobc(pgrp) == 0) { if (isjobproc(p, pgrp)) orphanpg(pgrp); } else pgrp->pg_flags &= ~PGRP_ORPHANED; PGRP_UNLOCK(pgrp); } } void killjobc(void) { struct session *sp; struct tty *tp; struct proc *p; struct vnode *ttyvp; p = curproc; MPASS(p->p_flag & P_WEXIT); sx_assert(&proctree_lock, SX_LOCKED); if (SESS_LEADER(p)) { sp = p->p_session; /* * s_ttyp is not zero'd; we use this to indicate that * the session once had a controlling terminal. (for * logging and informational purposes) */ SESS_LOCK(sp); ttyvp = sp->s_ttyvp; tp = sp->s_ttyp; sp->s_ttyvp = NULL; sp->s_ttydp = NULL; sp->s_leader = NULL; SESS_UNLOCK(sp); /* * Signal foreground pgrp and revoke access to * controlling terminal if it has not been revoked * already. * * Because the TTY may have been revoked in the mean * time and could already have a new session associated * with it, make sure we don't send a SIGHUP to a * foreground process group that does not belong to this * session. */ if (tp != NULL) { tty_lock(tp); if (tp->t_session == sp) tty_signal_pgrp(tp, SIGHUP); tty_unlock(tp); } if (ttyvp != NULL) { sx_xunlock(&proctree_lock); if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp); } devfs_ctty_unref(ttyvp); sx_xlock(&proctree_lock); } } fixjobc_kill(p); } /* * A process group has become orphaned, mark it as such for signal * delivery code. If there are any stopped processes in the group, * hang-up all process in that group. */ static void orphanpg(struct pgrp *pg) { struct proc *p; PGRP_LOCK_ASSERT(pg, MA_OWNED); pg->pg_flags |= PGRP_ORPHANED; LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (P_SHOULDSTOP(p) == P_STOPPED_SIG) { PROC_UNLOCK(p); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); kern_psignal(p, SIGHUP); kern_psignal(p, SIGCONT); PROC_UNLOCK(p); } return; } PROC_UNLOCK(p); } } void sess_hold(struct session *s) { refcount_acquire(&s->s_count); } void sess_release(struct session *s) { if (refcount_release(&s->s_count)) { if (s->s_ttyp != NULL) { tty_lock(s->s_ttyp); tty_rel_sess(s->s_ttyp, s); } proc_id_clear(PROC_ID_SESSION, s->s_sid); mtx_destroy(&s->s_mtx); free(s, M_SESSION); } } #ifdef DDB static void db_print_pgrp_one(struct pgrp *pgrp, struct proc *p) { db_printf( " pid %d at %p pr %d pgrp %p e %d jc %d\n", p->p_pid, p, p->p_pptr == NULL ? -1 : p->p_pptr->p_pid, p->p_pgrp, (p->p_treeflag & P_TREE_GRPEXITED) != 0, p->p_pptr == NULL ? 0 : isjobproc(p->p_pptr, pgrp)); } DB_SHOW_COMMAND(pgrpdump, pgrpdump) { struct pgrp *pgrp; struct proc *p; int i; for (i = 0; i <= pgrphash; i++) { if (!LIST_EMPTY(&pgrphashtbl[i])) { db_printf("indx %d\n", i); LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) { db_printf( " pgrp %p, pgid %d, sess %p, sesscnt %d, mem %p\n", pgrp, (int)pgrp->pg_id, pgrp->pg_session, pgrp->pg_session->s_count, LIST_FIRST(&pgrp->pg_members)); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) db_print_pgrp_one(pgrp, p); } } } } #endif /* DDB */ /* * Calculate the kinfo_proc members which contain process-wide * informations. * Must be called with the target process locked. */ static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); kp->ki_estcpu = 0; kp->ki_pctcpu = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); kp->ki_pctcpu += sched_pctcpu(td); kp->ki_estcpu += sched_estcpu(td); thread_unlock(td); } } /* * Fill in any information that is common to all threads in the process. * Must be called with the target process locked. */ static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) { struct thread *td0; struct ucred *cred; struct sigacts *ps; struct timeval boottime; PROC_LOCK_ASSERT(p, MA_OWNED); kp->ki_structsize = sizeof(*kp); kp->ki_paddr = p; kp->ki_addr =/* p->p_addr; */0; /* XXX */ kp->ki_args = p->p_args; kp->ki_textvp = p->p_textvp; #ifdef KTRACE kp->ki_tracep = ktr_get_tracevp(p, false); kp->ki_traceflag = p->p_traceflag; #endif kp->ki_fd = p->p_fd; kp->ki_pd = p->p_pd; kp->ki_vmspace = p->p_vmspace; kp->ki_flag = p->p_flag; kp->ki_flag2 = p->p_flag2; cred = p->p_ucred; if (cred) { kp->ki_uid = cred->cr_uid; kp->ki_ruid = cred->cr_ruid; kp->ki_svuid = cred->cr_svuid; kp->ki_cr_flags = 0; if (cred->cr_flags & CRED_FLAG_CAPMODE) kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE; /* XXX bde doesn't like KI_NGROUPS */ if (cred->cr_ngroups > KI_NGROUPS) { kp->ki_ngroups = KI_NGROUPS; kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; } else kp->ki_ngroups = cred->cr_ngroups; bcopy(cred->cr_groups, kp->ki_groups, kp->ki_ngroups * sizeof(gid_t)); kp->ki_rgid = cred->cr_rgid; kp->ki_svgid = cred->cr_svgid; /* If jailed(cred), emulate the old P_JAILED flag. */ if (jailed(cred)) { kp->ki_flag |= P_JAILED; /* If inside the jail, use 0 as a jail ID. */ if (cred->cr_prison != curthread->td_ucred->cr_prison) kp->ki_jid = cred->cr_prison->pr_id; } strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name, sizeof(kp->ki_loginclass)); } ps = p->p_sigacts; if (ps) { mtx_lock(&ps->ps_mtx); kp->ki_sigignore = ps->ps_sigignore; kp->ki_sigcatch = ps->ps_sigcatch; mtx_unlock(&ps->ps_mtx); } if (p->p_state != PRS_NEW && p->p_state != PRS_ZOMBIE && p->p_vmspace != NULL) { struct vmspace *vm = p->p_vmspace; kp->ki_size = vm->vm_map.size; kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/ FOREACH_THREAD_IN_PROC(p, td0) { if (!TD_IS_SWAPPED(td0)) kp->ki_rssize += td0->td_kstack_pages; } kp->ki_swrss = vm->vm_swrss; kp->ki_tsize = vm->vm_tsize; kp->ki_dsize = vm->vm_dsize; kp->ki_ssize = vm->vm_ssize; } else if (p->p_state == PRS_ZOMBIE) kp->ki_stat = SZOMB; if (kp->ki_flag & P_INMEM) kp->ki_sflag = PS_INMEM; else kp->ki_sflag = 0; /* Calculate legacy swtime as seconds since 'swtick'. */ kp->ki_swtime = (ticks - p->p_swtick) / hz; kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; kp->ki_fibnum = p->p_fibnum; kp->ki_start = p->p_stats->p_start; getboottime(&boottime); timevaladd(&kp->ki_start, &boottime); PROC_STATLOCK(p); rufetch(p, &kp->ki_rusage); kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime); PROC_STATUNLOCK(p); calccru(p, &kp->ki_childutime, &kp->ki_childstime); /* Some callers want child times in a single value. */ kp->ki_childtime = kp->ki_childstime; timevaladd(&kp->ki_childtime, &kp->ki_childutime); FOREACH_THREAD_IN_PROC(p, td0) kp->ki_cow += td0->td_cow; if (p->p_comm[0] != '\0') strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm)); if (p->p_sysent && p->p_sysent->sv_name != NULL && p->p_sysent->sv_name[0] != '\0') strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul)); kp->ki_siglist = p->p_siglist; kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig); kp->ki_acflag = p->p_acflag; kp->ki_lock = p->p_lock; if (p->p_pptr) { kp->ki_ppid = p->p_oppid; if (p->p_flag & P_TRACED) kp->ki_tracer = p->p_pptr->p_pid; } } /* * Fill job-related process information. */ static void fill_kinfo_proc_pgrp(struct proc *p, struct kinfo_proc *kp) { struct tty *tp; struct session *sp; struct pgrp *pgrp; sx_assert(&proctree_lock, SA_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); pgrp = p->p_pgrp; if (pgrp == NULL) return; kp->ki_pgid = pgrp->pg_id; kp->ki_jobc = pgrp_calc_jobc(pgrp); sp = pgrp->pg_session; tp = NULL; if (sp != NULL) { kp->ki_sid = sp->s_sid; SESS_LOCK(sp); strlcpy(kp->ki_login, sp->s_login, sizeof(kp->ki_login)); if (sp->s_ttyvp) kp->ki_kiflag |= KI_CTTY; if (SESS_LEADER(p)) kp->ki_kiflag |= KI_SLEADER; tp = sp->s_ttyp; SESS_UNLOCK(sp); } if ((p->p_flag & P_CONTROLT) && tp != NULL) { kp->ki_tdev = tty_udev(tp); kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; if (tp->t_session) kp->ki_tsid = tp->t_session->s_sid; } else { kp->ki_tdev = NODEV; kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ } } /* * Fill in information that is thread specific. Must be called with * target process locked. If 'preferthread' is set, overwrite certain * process-related fields that are maintained for both threads and * processes. */ static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread) { struct proc *p; p = td->td_proc; kp->ki_tdaddr = td; PROC_LOCK_ASSERT(p, MA_OWNED); if (preferthread) PROC_STATLOCK(p); thread_lock(td); if (td->td_wmesg != NULL) strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg)); else bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg)); if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >= sizeof(kp->ki_tdname)) { strlcpy(kp->ki_moretdname, td->td_name + sizeof(kp->ki_tdname) - 1, sizeof(kp->ki_moretdname)); } else { bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname)); } if (TD_ON_LOCK(td)) { kp->ki_kiflag |= KI_LOCKBLOCK; strlcpy(kp->ki_lockname, td->td_lockname, sizeof(kp->ki_lockname)); } else { kp->ki_kiflag &= ~KI_LOCKBLOCK; bzero(kp->ki_lockname, sizeof(kp->ki_lockname)); } if (p->p_state == PRS_NORMAL) { /* approximate. */ if (TD_ON_RUNQ(td) || TD_CAN_RUN(td) || TD_IS_RUNNING(td)) { kp->ki_stat = SRUN; } else if (P_SHOULDSTOP(p)) { kp->ki_stat = SSTOP; } else if (TD_IS_SLEEPING(td)) { kp->ki_stat = SSLEEP; } else if (TD_ON_LOCK(td)) { kp->ki_stat = SLOCK; } else { kp->ki_stat = SWAIT; } } else if (p->p_state == PRS_ZOMBIE) { kp->ki_stat = SZOMB; } else { kp->ki_stat = SIDL; } /* Things in the thread */ kp->ki_wchan = td->td_wchan; kp->ki_pri.pri_level = td->td_priority; kp->ki_pri.pri_native = td->td_base_pri; /* * Note: legacy fields; clamp at the old NOCPU value and/or * the maximum u_char CPU value. */ if (td->td_lastcpu == NOCPU) kp->ki_lastcpu_old = NOCPU_OLD; else if (td->td_lastcpu > MAXCPU_OLD) kp->ki_lastcpu_old = MAXCPU_OLD; else kp->ki_lastcpu_old = td->td_lastcpu; if (td->td_oncpu == NOCPU) kp->ki_oncpu_old = NOCPU_OLD; else if (td->td_oncpu > MAXCPU_OLD) kp->ki_oncpu_old = MAXCPU_OLD; else kp->ki_oncpu_old = td->td_oncpu; kp->ki_lastcpu = td->td_lastcpu; kp->ki_oncpu = td->td_oncpu; kp->ki_tdflags = td->td_flags; kp->ki_tid = td->td_tid; kp->ki_numthreads = p->p_numthreads; kp->ki_pcb = td->td_pcb; kp->ki_kstack = (void *)td->td_kstack; kp->ki_slptime = (ticks - td->td_slptick) / hz; kp->ki_pri.pri_class = td->td_pri_class; kp->ki_pri.pri_user = td->td_user_pri; if (preferthread) { rufetchtd(td, &kp->ki_rusage); kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime); kp->ki_pctcpu = sched_pctcpu(td); kp->ki_estcpu = sched_estcpu(td); kp->ki_cow = td->td_cow; } /* We can't get this anymore but ps etc never used it anyway. */ kp->ki_rqindex = 0; if (preferthread) kp->ki_siglist = td->td_siglist; kp->ki_sigmask = td->td_sigmask; thread_unlock(td); if (preferthread) PROC_STATUNLOCK(p); } /* * Fill in a kinfo_proc structure for the specified process. * Must be called with the target process locked. */ void fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) { MPASS(FIRST_THREAD_IN_PROC(p) != NULL); bzero(kp, sizeof(*kp)); fill_kinfo_proc_pgrp(p,kp); fill_kinfo_proc_only(p, kp); fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0); fill_kinfo_aggregate(p, kp); } struct pstats * pstats_alloc(void) { return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK)); } /* * Copy parts of p_stats; zero the rest of p_stats (statistics). */ void pstats_fork(struct pstats *src, struct pstats *dst) { bzero(&dst->pstat_startzero, __rangeof(struct pstats, pstat_startzero, pstat_endzero)); bcopy(&src->pstat_startcopy, &dst->pstat_startcopy, __rangeof(struct pstats, pstat_startcopy, pstat_endcopy)); } void pstats_free(struct pstats *ps) { free(ps, M_SUBPROC); } #ifdef COMPAT_FREEBSD32 /* * This function is typically used to copy out the kernel address, so * it can be replaced by assignment of zero. */ static inline uint32_t ptr32_trim(const void *ptr) { uintptr_t uptr; uptr = (uintptr_t)ptr; return ((uptr > UINT_MAX) ? 0 : uptr); } #define PTRTRIM_CP(src,dst,fld) \ do { (dst).fld = ptr32_trim((src).fld); } while (0) static void freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32) { int i; bzero(ki32, sizeof(struct kinfo_proc32)); ki32->ki_structsize = sizeof(struct kinfo_proc32); CP(*ki, *ki32, ki_layout); PTRTRIM_CP(*ki, *ki32, ki_args); PTRTRIM_CP(*ki, *ki32, ki_paddr); PTRTRIM_CP(*ki, *ki32, ki_addr); PTRTRIM_CP(*ki, *ki32, ki_tracep); PTRTRIM_CP(*ki, *ki32, ki_textvp); PTRTRIM_CP(*ki, *ki32, ki_fd); PTRTRIM_CP(*ki, *ki32, ki_vmspace); PTRTRIM_CP(*ki, *ki32, ki_wchan); CP(*ki, *ki32, ki_pid); CP(*ki, *ki32, ki_ppid); CP(*ki, *ki32, ki_pgid); CP(*ki, *ki32, ki_tpgid); CP(*ki, *ki32, ki_sid); CP(*ki, *ki32, ki_tsid); CP(*ki, *ki32, ki_jobc); CP(*ki, *ki32, ki_tdev); CP(*ki, *ki32, ki_tdev_freebsd11); CP(*ki, *ki32, ki_siglist); CP(*ki, *ki32, ki_sigmask); CP(*ki, *ki32, ki_sigignore); CP(*ki, *ki32, ki_sigcatch); CP(*ki, *ki32, ki_uid); CP(*ki, *ki32, ki_ruid); CP(*ki, *ki32, ki_svuid); CP(*ki, *ki32, ki_rgid); CP(*ki, *ki32, ki_svgid); CP(*ki, *ki32, ki_ngroups); for (i = 0; i < KI_NGROUPS; i++) CP(*ki, *ki32, ki_groups[i]); CP(*ki, *ki32, ki_size); CP(*ki, *ki32, ki_rssize); CP(*ki, *ki32, ki_swrss); CP(*ki, *ki32, ki_tsize); CP(*ki, *ki32, ki_dsize); CP(*ki, *ki32, ki_ssize); CP(*ki, *ki32, ki_xstat); CP(*ki, *ki32, ki_acflag); CP(*ki, *ki32, ki_pctcpu); CP(*ki, *ki32, ki_estcpu); CP(*ki, *ki32, ki_slptime); CP(*ki, *ki32, ki_swtime); CP(*ki, *ki32, ki_cow); CP(*ki, *ki32, ki_runtime); TV_CP(*ki, *ki32, ki_start); TV_CP(*ki, *ki32, ki_childtime); CP(*ki, *ki32, ki_flag); CP(*ki, *ki32, ki_kiflag); CP(*ki, *ki32, ki_traceflag); CP(*ki, *ki32, ki_stat); CP(*ki, *ki32, ki_nice); CP(*ki, *ki32, ki_lock); CP(*ki, *ki32, ki_rqindex); CP(*ki, *ki32, ki_oncpu); CP(*ki, *ki32, ki_lastcpu); /* XXX TODO: wrap cpu value as appropriate */ CP(*ki, *ki32, ki_oncpu_old); CP(*ki, *ki32, ki_lastcpu_old); bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1); bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1); bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1); bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1); bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1); bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1); bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1); bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1); CP(*ki, *ki32, ki_tracer); CP(*ki, *ki32, ki_flag2); CP(*ki, *ki32, ki_fibnum); CP(*ki, *ki32, ki_cr_flags); CP(*ki, *ki32, ki_jid); CP(*ki, *ki32, ki_numthreads); CP(*ki, *ki32, ki_tid); CP(*ki, *ki32, ki_pri); freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage); freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch); PTRTRIM_CP(*ki, *ki32, ki_pcb); PTRTRIM_CP(*ki, *ki32, ki_kstack); PTRTRIM_CP(*ki, *ki32, ki_udata); PTRTRIM_CP(*ki, *ki32, ki_tdaddr); CP(*ki, *ki32, ki_sflag); CP(*ki, *ki32, ki_tdflags); } #endif static ssize_t kern_proc_out_size(struct proc *p, int flags) { ssize_t size = 0; PROC_LOCK_ASSERT(p, MA_OWNED); if ((flags & KERN_PROC_NOTHREADS) != 0) { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { size += sizeof(struct kinfo_proc32); } else #endif size += sizeof(struct kinfo_proc); } else { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) size += sizeof(struct kinfo_proc32) * p->p_numthreads; else #endif size += sizeof(struct kinfo_proc) * p->p_numthreads; } PROC_UNLOCK(p); return (size); } int kern_proc_out(struct proc *p, struct sbuf *sb, int flags) { struct thread *td; struct kinfo_proc ki; #ifdef COMPAT_FREEBSD32 struct kinfo_proc32 ki32; #endif int error; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS(FIRST_THREAD_IN_PROC(p) != NULL); error = 0; fill_kinfo_proc(p, &ki); if ((flags & KERN_PROC_NOTHREADS) != 0) { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) error = ENOMEM; } else #endif if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) error = ENOMEM; } else { FOREACH_THREAD_IN_PROC(p, td) { fill_kinfo_thread(td, &ki, 1); #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) error = ENOMEM; } else #endif if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) error = ENOMEM; if (error != 0) break; } } PROC_UNLOCK(p); return (error); } static int sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) { struct sbuf sb; struct kinfo_proc ki; int error, error2; if (req->oldptr == NULL) return (SYSCTL_OUT(req, 0, kern_proc_out_size(p, flags))); sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = kern_proc_out(p, &sb, flags); error2 = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0) return (error); else if (error2 != 0) return (error2); return (0); } int proc_iterate(int (*cb)(struct proc *, void *), void *cbarg) { struct proc *p; int error, i, j; for (i = 0; i < pidhashlock + 1; i++) { sx_slock(&proctree_lock); sx_slock(&pidhashtbl_lock[i]); for (j = i; j <= pidhash; j += pidhashlock + 1) { LIST_FOREACH(p, &pidhashtbl[j], p_hash) { if (p->p_state == PRS_NEW) continue; error = cb(p, cbarg); PROC_LOCK_ASSERT(p, MA_NOTOWNED); if (error != 0) { sx_sunlock(&pidhashtbl_lock[i]); sx_sunlock(&proctree_lock); return (error); } } } sx_sunlock(&pidhashtbl_lock[i]); sx_sunlock(&proctree_lock); } return (0); } struct kern_proc_out_args { struct sysctl_req *req; int flags; int oid_number; int *name; }; static int sysctl_kern_proc_iterate(struct proc *p, void *origarg) { struct kern_proc_out_args *arg = origarg; int *name = arg->name; int oid_number = arg->oid_number; int flags = arg->flags; struct sysctl_req *req = arg->req; int error = 0; PROC_LOCK(p); KASSERT(p->p_ucred != NULL, ("process credential is NULL for non-NEW proc")); /* * Show a user only appropriate processes. */ if (p_cansee(curthread, p)) goto skip; /* * TODO - make more efficient (see notes below). * do by session. */ switch (oid_number) { case KERN_PROC_GID: if (p->p_ucred->cr_gid != (gid_t)name[0]) goto skip; break; case KERN_PROC_PGRP: /* could do this by traversing pgrp */ if (p->p_pgrp == NULL || p->p_pgrp->pg_id != (pid_t)name[0]) goto skip; break; case KERN_PROC_RGID: if (p->p_ucred->cr_rgid != (gid_t)name[0]) goto skip; break; case KERN_PROC_SESSION: if (p->p_session == NULL || p->p_session->s_sid != (pid_t)name[0]) goto skip; break; case KERN_PROC_TTY: if ((p->p_flag & P_CONTROLT) == 0 || p->p_session == NULL) goto skip; /* XXX proctree_lock */ SESS_LOCK(p->p_session); if (p->p_session->s_ttyp == NULL || tty_udev(p->p_session->s_ttyp) != (dev_t)name[0]) { SESS_UNLOCK(p->p_session); goto skip; } SESS_UNLOCK(p->p_session); break; case KERN_PROC_UID: if (p->p_ucred->cr_uid != (uid_t)name[0]) goto skip; break; case KERN_PROC_RUID: if (p->p_ucred->cr_ruid != (uid_t)name[0]) goto skip; break; case KERN_PROC_PROC: break; default: break; } error = sysctl_out_proc(p, req, flags); PROC_LOCK_ASSERT(p, MA_NOTOWNED); return (error); skip: PROC_UNLOCK(p); return (0); } static int sysctl_kern_proc(SYSCTL_HANDLER_ARGS) { struct kern_proc_out_args iterarg; int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int flags, oid_number; int error = 0; oid_number = oidp->oid_number; if (oid_number != KERN_PROC_ALL && (oid_number & KERN_PROC_INC_THREAD) == 0) flags = KERN_PROC_NOTHREADS; else { flags = 0; oid_number &= ~KERN_PROC_INC_THREAD; } #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) flags |= KERN_PROC_MASK32; #endif if (oid_number == KERN_PROC_PID) { if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, 0); if (error) return (error); sx_slock(&proctree_lock); error = pget((pid_t)name[0], PGET_CANSEE, &p); if (error == 0) error = sysctl_out_proc(p, req, flags); sx_sunlock(&proctree_lock); return (error); } switch (oid_number) { case KERN_PROC_ALL: if (namelen != 0) return (EINVAL); break; case KERN_PROC_PROC: if (namelen != 0 && namelen != 1) return (EINVAL); break; default: if (namelen != 1) return (EINVAL); break; } if (req->oldptr == NULL) { /* overestimate by 5 procs */ error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); if (error) return (error); } else { error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); } iterarg.flags = flags; iterarg.oid_number = oid_number; iterarg.req = req; iterarg.name = name; error = proc_iterate(sysctl_kern_proc_iterate, &iterarg); return (error); } struct pargs * pargs_alloc(int len) { struct pargs *pa; pa = malloc(sizeof(struct pargs) + len, M_PARGS, M_WAITOK); refcount_init(&pa->ar_ref, 1); pa->ar_length = len; return (pa); } static void pargs_free(struct pargs *pa) { free(pa, M_PARGS); } void pargs_hold(struct pargs *pa) { if (pa == NULL) return; refcount_acquire(&pa->ar_ref); } void pargs_drop(struct pargs *pa) { if (pa == NULL) return; if (refcount_release(&pa->ar_ref)) pargs_free(pa); } static int proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf, size_t len) { ssize_t n; /* * This may return a short read if the string is shorter than the chunk * and is aligned at the end of the page, and the following page is not * mapped. */ n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len); if (n <= 0) return (ENOMEM); return (0); } #define PROC_AUXV_MAX 256 /* Safety limit on auxv size. */ enum proc_vector_type { PROC_ARG, PROC_ENV, PROC_AUX, }; #ifdef COMPAT_FREEBSD32 static int get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct freebsd32_ps_strings pss; Elf32_Auxinfo aux; vm_offset_t vptr, ptr; uint32_t *proc_vector32; char **proc_vector; size_t vsize, size; int i, error; error = 0; if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss, sizeof(pss)) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: vptr = (vm_offset_t)PTRIN(pss.ps_argvstr); vsize = pss.ps_nargvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(int32_t); break; case PROC_ENV: vptr = (vm_offset_t)PTRIN(pss.ps_envstr); vsize = pss.ps_nenvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(int32_t); break; case PROC_AUX: vptr = (vm_offset_t)PTRIN(pss.ps_envstr) + (pss.ps_nenvstr + 1) * sizeof(int32_t); if (vptr % 4 != 0) return (ENOEXEC); for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; ptr += sizeof(aux); } if (aux.a_type != AT_NULL) return (ENOEXEC); vsize = i + 1; size = vsize * sizeof(aux); break; default: KASSERT(0, ("Wrong proc vector type: %d", type)); return (EINVAL); } proc_vector32 = malloc(size, M_TEMP, M_WAITOK); if (proc_readmem(td, p, vptr, proc_vector32, size) != size) { error = ENOMEM; goto done; } if (type == PROC_AUX) { *proc_vectorp = (char **)proc_vector32; *vsizep = vsize; return (0); } proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK); for (i = 0; i < (int)vsize; i++) proc_vector[i] = PTRIN(proc_vector32[i]); *proc_vectorp = proc_vector; *vsizep = vsize; done: free(proc_vector32, M_TEMP); return (error); } #endif static int get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct ps_strings pss; Elf_Auxinfo aux; vm_offset_t vptr, ptr; char **proc_vector; size_t vsize, size; int i; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) return (get_proc_vector32(td, p, proc_vectorp, vsizep, type)); #endif if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss, sizeof(pss)) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: vptr = (vm_offset_t)pss.ps_argvstr; vsize = pss.ps_nargvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(char *); break; case PROC_ENV: vptr = (vm_offset_t)pss.ps_envstr; vsize = pss.ps_nenvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(char *); break; case PROC_AUX: /* * The aux array is just above env array on the stack. Check * that the address is naturally aligned. */ vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1) * sizeof(char *); #if __ELF_WORD_SIZE == 64 if (vptr % sizeof(uint64_t) != 0) #else if (vptr % sizeof(uint32_t) != 0) #endif return (ENOEXEC); /* * We count the array size reading the aux vectors from the * stack until AT_NULL vector is returned. So (to keep the code * simple) we read the process stack twice: the first time here * to find the size and the second time when copying the vectors * to the allocated proc_vector. */ for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; ptr += sizeof(aux); } /* * If the PROC_AUXV_MAX entries are iterated over, and we have * not reached AT_NULL, it is most likely we are reading wrong * data: either the process doesn't have auxv array or data has * been modified. Return the error in this case. */ if (aux.a_type != AT_NULL) return (ENOEXEC); vsize = i + 1; size = vsize * sizeof(aux); break; default: KASSERT(0, ("Wrong proc vector type: %d", type)); return (EINVAL); /* In case we are built without INVARIANTS. */ } proc_vector = malloc(size, M_TEMP, M_WAITOK); if (proc_readmem(td, p, vptr, proc_vector, size) != size) { free(proc_vector, M_TEMP); return (ENOMEM); } *proc_vectorp = proc_vector; *vsizep = vsize; return (0); } #define GET_PS_STRINGS_CHUNK_SZ 256 /* Chunk size (bytes) for ps_strings operations. */ static int get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb, enum proc_vector_type type) { size_t done, len, nchr, vsize; int error, i; char **proc_vector, *sptr; char pss_string[GET_PS_STRINGS_CHUNK_SZ]; PROC_ASSERT_HELD(p); /* * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes. */ nchr = 2 * (PATH_MAX + ARG_MAX); error = get_proc_vector(td, p, &proc_vector, &vsize, type); if (error != 0) return (error); for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) { /* * The program may have scribbled into its argv array, e.g. to * remove some arguments. If that has happened, break out * before trying to read from NULL. */ if (proc_vector[i] == NULL) break; for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) { error = proc_read_string(td, p, sptr, pss_string, sizeof(pss_string)); if (error != 0) goto done; len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ); if (done + len >= nchr) len = nchr - done - 1; sbuf_bcat(sb, pss_string, len); if (len != GET_PS_STRINGS_CHUNK_SZ) break; done += GET_PS_STRINGS_CHUNK_SZ; } sbuf_bcat(sb, "", 1); done += len + 1; } done: free(proc_vector, M_TEMP); return (error); } int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb) { return (get_ps_strings(curthread, p, sb, PROC_ARG)); } int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb) { return (get_ps_strings(curthread, p, sb, PROC_ENV)); } int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb) { size_t vsize, size; char **auxv; int error; error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX); if (error == 0) { #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) size = vsize * sizeof(Elf32_Auxinfo); else #endif size = vsize * sizeof(Elf_Auxinfo); if (sbuf_bcat(sb, auxv, size) != 0) error = ENOMEM; free(auxv, M_TEMP); } return (error); } /* * This sysctl allows a process to retrieve the argument list or process * title for another process without groping around in the address space * of the other process. It also allow a process to set its own "process * title to a string of its own choice. */ static int sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct pargs *newpa, *pa; struct proc *p; struct sbuf sb; int flags, error = 0, error2; pid_t pid; if (namelen != 1) return (EINVAL); p = curproc; pid = (pid_t)name[0]; if (pid == -1) { pid = p->p_pid; } /* * If the query is for this process and it is single-threaded, there * is nobody to modify pargs, thus we can just read. */ if (pid == p->p_pid && p->p_numthreads == 1 && req->newptr == NULL && (pa = p->p_args) != NULL) return (SYSCTL_OUT(req, pa->ar_args, pa->ar_length)); flags = PGET_CANSEE; if (req->newptr != NULL) flags |= PGET_ISCURRENT; error = pget(pid, flags, &p); if (error) return (error); pa = p->p_args; if (pa != NULL) { pargs_hold(pa); PROC_UNLOCK(p); error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length); pargs_drop(pa); } else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) { _PHOLD(p); PROC_UNLOCK(p); sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getargv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); if (error == 0 && error2 != 0) error = error2; } else { PROC_UNLOCK(p); } if (error != 0 || req->newptr == NULL) return (error); if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs)) return (ENOMEM); if (req->newlen == 0) { /* * Clear the argument pointer, so that we'll fetch arguments * with proc_getargv() until further notice. */ newpa = NULL; } else { newpa = pargs_alloc(req->newlen); error = SYSCTL_IN(req, newpa->ar_args, req->newlen); if (error != 0) { pargs_free(newpa); return (error); } } PROC_LOCK(p); pa = p->p_args; p->p_args = newpa; PROC_UNLOCK(p); pargs_drop(pa); return (0); } /* * This sysctl allows a process to retrieve environment of another process. */ static int sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct sbuf sb; int error, error2; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); if ((p->p_flag & P_SYSTEM) != 0) { PRELE(p); return (0); } sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getenvv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); return (error != 0 ? error : error2); } /* * This sysctl allows a process to retrieve ELF auxiliary vector of * another process. */ static int sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct sbuf sb; int error, error2; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); if ((p->p_flag & P_SYSTEM) != 0) { PRELE(p); return (0); } sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getauxv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); return (error != 0 ? error : error2); } /* * Look up the canonical executable path running in the specified process. * It tries to return the same hardlink name as was used for execve(2). * This allows the programs that modify their behavior based on their progname, * to operate correctly. * * Result is returned in retbuf, it must not be freed, similar to vn_fullpath() * calling conventions. * binname is a pointer to temporary string buffer of length MAXPATHLEN, * allocated and freed by caller. * freebuf should be freed by caller, from the M_TEMP malloc type. */ int proc_get_binpath(struct proc *p, char *binname, char **retbuf, char **freebuf) { struct nameidata nd; struct vnode *vp, *dvp; size_t freepath_size; int error; bool do_fullpath; PROC_LOCK_ASSERT(p, MA_OWNED); vp = p->p_textvp; if (vp == NULL) { PROC_UNLOCK(p); *retbuf = ""; *freebuf = NULL; return (0); } vref(vp); dvp = p->p_textdvp; if (dvp != NULL) vref(dvp); if (p->p_binname != NULL) strlcpy(binname, p->p_binname, MAXPATHLEN); PROC_UNLOCK(p); do_fullpath = true; *freebuf = NULL; if (dvp != NULL && binname[0] != '\0') { freepath_size = MAXPATHLEN; if (vn_fullpath_hardlink(vp, dvp, binname, strlen(binname), retbuf, freebuf, &freepath_size) == 0) { /* * Recheck the looked up path. The binary * might have been renamed or replaced, in * which case we should not report old name. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, *retbuf); error = namei(&nd); if (error == 0) { if (nd.ni_vp == vp) do_fullpath = false; vrele(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); } } } if (do_fullpath) { free(*freebuf, M_TEMP); *freebuf = NULL; error = vn_fullpath(vp, retbuf, freebuf); } vrele(vp); if (dvp != NULL) vrele(dvp); return (error); } /* * This sysctl allows a process to retrieve the path of the executable for * itself or another process. */ static int sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS) { pid_t *pidp = (pid_t *)arg1; unsigned int arglen = arg2; struct proc *p; char *retbuf, *freebuf, *binname; int error; if (arglen != 1) return (EINVAL); binname = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); binname[0] = '\0'; if (*pidp == -1) { /* -1 means this process */ error = 0; p = req->td->td_proc; PROC_LOCK(p); } else { error = pget(*pidp, PGET_CANSEE, &p); } if (error == 0) error = proc_get_binpath(p, binname, &retbuf, &freebuf); free(binname, M_TEMP); if (error != 0) return (error); error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1); free(freebuf, M_TEMP); return (error); } static int sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS) { struct proc *p; char *sv_name; int *name; int namelen; int error; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; error = pget((pid_t)name[0], PGET_CANSEE, &p); if (error != 0) return (error); sv_name = p->p_sysent->sv_name; PROC_UNLOCK(p); return (sysctl_handle_string(oidp, sv_name, 0, req)); } #ifdef KINFO_OVMENTRY_SIZE CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE); #endif #ifdef COMPAT_FREEBSD7 static int sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS) { vm_map_entry_t entry, tmp_entry; unsigned int last_timestamp, namelen; char *fullpath, *freepath; struct kinfo_ovmentry *kve; struct vattr va; struct ucred *cred; int error, *name; struct vnode *vp; struct proc *p; vm_map_t map; struct vmspace *vm; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); vm = vmspace_acquire_ref(p); if (vm == NULL) { PRELE(p); return (ESRCH); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK); map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { vm_object_t obj, tobj, lobj; vm_offset_t addr; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; bzero(kve, sizeof(*kve)); kve->kve_structsize = sizeof(*kve); kve->kve_private_resident = 0; obj = entry->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); if (obj->shadow_count == 1) kve->kve_private_resident = obj->resident_page_count; } kve->kve_resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(map->pmap, addr)) kve->kve_resident++; addr += PAGE_SIZE; } for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { if (tobj != obj) { VM_OBJECT_RLOCK(tobj); kve->kve_offset += tobj->backing_object_offset; } if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } kve->kve_start = (void*)entry->start; kve->kve_end = (void*)entry->end; kve->kve_offset += (off_t)entry->offset; if (entry->protection & VM_PROT_READ) kve->kve_protection |= KVME_PROT_READ; if (entry->protection & VM_PROT_WRITE) kve->kve_protection |= KVME_PROT_WRITE; if (entry->protection & VM_PROT_EXECUTE) kve->kve_protection |= KVME_PROT_EXEC; if (entry->eflags & MAP_ENTRY_COW) kve->kve_flags |= KVME_FLAG_COW; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) kve->kve_flags |= KVME_FLAG_NEEDS_COPY; if (entry->eflags & MAP_ENTRY_NOCOREDUMP) kve->kve_flags |= KVME_FLAG_NOCOREDUMP; last_timestamp = map->timestamp; vm_map_unlock_read(map); kve->kve_fileid = 0; kve->kve_fsid = 0; freepath = NULL; fullpath = ""; if (lobj) { kve->kve_type = vm_object_kvme_type(lobj, &vp); if (kve->kve_type == KVME_TYPE_MGTDEVICE) kve->kve_type = KVME_TYPE_UNKNOWN; if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); kve->kve_ref_count = obj->ref_count; kve->kve_shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(vp, &fullpath, &freepath); cred = curthread->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_fileid = va.va_fileid; /* truncate */ kve->kve_fsid = va.va_fsid; } vput(vp); } } else { kve->kve_type = KVME_TYPE_NONE; kve->kve_ref_count = 0; kve->kve_shadow_count = 0; } strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); if (freepath != NULL) free(freepath, M_TEMP); error = SYSCTL_OUT(req, kve, sizeof(*kve)); vm_map_lock_read(map); if (error) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); PRELE(p); free(kve, M_TEMP); return (error); } #endif /* COMPAT_FREEBSD7 */ #ifdef KINFO_VMENTRY_SIZE CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); #endif void kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry, int *resident_count, bool *super) { vm_object_t obj, tobj; vm_page_t m, m_adv; vm_offset_t addr; vm_paddr_t pa; vm_pindex_t pi, pi_adv, pindex; *super = false; *resident_count = 0; if (vmmap_skip_res_cnt) return; pa = 0; obj = entry->object.vm_object; addr = entry->start; m_adv = NULL; pi = OFF_TO_IDX(entry->offset); for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) { if (m_adv != NULL) { m = m_adv; } else { pi_adv = atop(entry->end - addr); pindex = pi; for (tobj = obj;; tobj = tobj->backing_object) { m = vm_page_find_least(tobj, pindex); if (m != NULL) { if (m->pindex == pindex) break; if (pi_adv > m->pindex - pindex) { pi_adv = m->pindex - pindex; m_adv = m; } } if (tobj->backing_object == NULL) goto next; pindex += OFF_TO_IDX(tobj-> backing_object_offset); } } m_adv = NULL; if (m->psind != 0 && addr + pagesizes[1] <= entry->end && (addr & (pagesizes[1] - 1)) == 0 && (pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) { *super = true; pi_adv = atop(pagesizes[1]); } else { /* * We do not test the found page on validity. * Either the page is busy and being paged in, * or it was invalidated. The first case * should be counted as resident, the second * is not so clear; we do account both. */ pi_adv = 1; } *resident_count += pi_adv; next:; } } /* * Must be called with the process locked and will return unlocked. */ int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { vm_map_entry_t entry, tmp_entry; struct vattr va; vm_map_t map; vm_object_t lobj, nobj, obj, tobj; char *fullpath, *freepath; struct kinfo_vmentry *kve; struct ucred *cred; struct vnode *vp; struct vmspace *vm; vm_offset_t addr; unsigned int last_timestamp; int error; bool guard, super; PROC_LOCK_ASSERT(p, MA_OWNED); _PHOLD(p); PROC_UNLOCK(p); vm = vmspace_acquire_ref(p); if (vm == NULL) { PRELE(p); return (ESRCH); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); error = 0; map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; addr = entry->end; bzero(kve, sizeof(*kve)); obj = entry->object.vm_object; if (obj != NULL) { if ((obj->flags & OBJ_ANON) != 0) kve->kve_obj = (uintptr_t)obj; for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { VM_OBJECT_RLOCK(tobj); kve->kve_offset += tobj->backing_object_offset; lobj = tobj; } if (obj->backing_object == NULL) kve->kve_private_resident = obj->resident_page_count; kern_proc_vmmap_resident(map, entry, &kve->kve_resident, &super); if (super) kve->kve_flags |= KVME_FLAG_SUPER; for (tobj = obj; tobj != NULL; tobj = nobj) { nobj = tobj->backing_object; if (tobj != obj && tobj != lobj) VM_OBJECT_RUNLOCK(tobj); } } else { lobj = NULL; } kve->kve_start = entry->start; kve->kve_end = entry->end; kve->kve_offset += entry->offset; if (entry->protection & VM_PROT_READ) kve->kve_protection |= KVME_PROT_READ; if (entry->protection & VM_PROT_WRITE) kve->kve_protection |= KVME_PROT_WRITE; if (entry->protection & VM_PROT_EXECUTE) kve->kve_protection |= KVME_PROT_EXEC; if (entry->eflags & MAP_ENTRY_COW) kve->kve_flags |= KVME_FLAG_COW; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) kve->kve_flags |= KVME_FLAG_NEEDS_COPY; if (entry->eflags & MAP_ENTRY_NOCOREDUMP) kve->kve_flags |= KVME_FLAG_NOCOREDUMP; if (entry->eflags & MAP_ENTRY_GROWS_UP) kve->kve_flags |= KVME_FLAG_GROWS_UP; if (entry->eflags & MAP_ENTRY_GROWS_DOWN) kve->kve_flags |= KVME_FLAG_GROWS_DOWN; if (entry->eflags & MAP_ENTRY_USER_WIRED) kve->kve_flags |= KVME_FLAG_USER_WIRED; guard = (entry->eflags & MAP_ENTRY_GUARD) != 0; last_timestamp = map->timestamp; vm_map_unlock_read(map); freepath = NULL; fullpath = ""; if (lobj != NULL) { kve->kve_type = vm_object_kvme_type(lobj, &vp); if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); kve->kve_ref_count = obj->ref_count; kve->kve_shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(vp, &fullpath, &freepath); kve->kve_vn_type = vntype_to_kinfo(vp->v_type); cred = curthread->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_vn_fileid = va.va_fileid; kve->kve_vn_fsid = va.va_fsid; kve->kve_vn_fsid_freebsd11 = kve->kve_vn_fsid; /* truncate */ kve->kve_vn_mode = MAKEIMODE(va.va_type, va.va_mode); kve->kve_vn_size = va.va_size; kve->kve_vn_rdev = va.va_rdev; kve->kve_vn_rdev_freebsd11 = kve->kve_vn_rdev; /* truncate */ kve->kve_status = KF_ATTR_VALID; } vput(vp); } } else { kve->kve_type = guard ? KVME_TYPE_GUARD : KVME_TYPE_NONE; kve->kve_ref_count = 0; kve->kve_shadow_count = 0; } strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); if (freepath != NULL) free(freepath, M_TEMP); /* Pack record size down */ if ((flags & KERN_VMMAP_PACK_KINFO) != 0) kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) + strlen(kve->kve_path) + 1; else kve->kve_structsize = sizeof(*kve); kve->kve_structsize = roundup(kve->kve_structsize, sizeof(uint64_t)); /* Halt filling and truncate rather than exceeding maxlen */ if (maxlen != -1 && maxlen < kve->kve_structsize) { error = 0; vm_map_lock_read(map); break; } else if (maxlen != -1) maxlen -= kve->kve_structsize; if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0) error = ENOMEM; vm_map_lock_read(map); if (error != 0) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); PRELE(p); free(kve, M_TEMP); return (error); } static int sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS) { struct proc *p; struct sbuf sb; u_int namelen; int error, error2, *name; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } #if defined(STACK) || defined(DDB) static int sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS) { struct kinfo_kstack *kkstp; int error, i, *name, numthreads; lwpid_t *lwpidarray; struct thread *td; struct stack *st; struct sbuf sb; struct proc *p; u_int namelen; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p); if (error != 0) return (error); kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK); st = stack_create(M_WAITOK); lwpidarray = NULL; PROC_LOCK(p); do { if (lwpidarray != NULL) { free(lwpidarray, M_TEMP); lwpidarray = NULL; } numthreads = p->p_numthreads; PROC_UNLOCK(p); lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP, M_WAITOK | M_ZERO); PROC_LOCK(p); } while (numthreads < p->p_numthreads); /* * XXXRW: During the below loop, execve(2) and countless other sorts * of changes could have taken place. Should we check to see if the * vmspace has been replaced, or the like, in order to prevent * giving a snapshot that spans, say, execve(2), with some threads * before and some after? Among other things, the credentials could * have changed, in which case the right to extract debug info might * no longer be assured. */ i = 0; FOREACH_THREAD_IN_PROC(p, td) { KASSERT(i < numthreads, ("sysctl_kern_proc_kstack: numthreads")); lwpidarray[i] = td->td_tid; i++; } PROC_UNLOCK(p); numthreads = i; for (i = 0; i < numthreads; i++) { td = tdfind(lwpidarray[i], p->p_pid); if (td == NULL) { continue; } bzero(kkstp, sizeof(*kkstp)); (void)sbuf_new(&sb, kkstp->kkst_trace, sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN); thread_lock(td); kkstp->kkst_tid = td->td_tid; if (TD_IS_SWAPPED(td)) kkstp->kkst_state = KKST_STATE_SWAPPED; else if (stack_save_td(st, td) == 0) kkstp->kkst_state = KKST_STATE_STACKOK; else kkstp->kkst_state = KKST_STATE_RUNNING; thread_unlock(td); PROC_UNLOCK(p); stack_sbuf_print(&sb, st); sbuf_finish(&sb); sbuf_delete(&sb); error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp)); if (error) break; } PRELE(p); if (lwpidarray != NULL) free(lwpidarray, M_TEMP); stack_destroy(st); free(kkstp, M_TEMP); return (error); } #endif /* * This sysctl allows a process to retrieve the full list of groups from * itself or another process. */ static int sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS) { pid_t *pidp = (pid_t *)arg1; unsigned int arglen = arg2; struct proc *p; struct ucred *cred; int error; if (arglen != 1) return (EINVAL); if (*pidp == -1) { /* -1 means this process */ p = req->td->td_proc; PROC_LOCK(p); } else { error = pget(*pidp, PGET_CANSEE, &p); if (error != 0) return (error); } cred = crhold(p->p_ucred); PROC_UNLOCK(p); error = SYSCTL_OUT(req, cred->cr_groups, cred->cr_ngroups * sizeof(gid_t)); crfree(cred); return (error); } /* * This sysctl allows a process to retrieve or/and set the resource limit for * another process. */ static int sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct rlimit rlim; struct proc *p; u_int which; int flags, error; if (namelen != 2) return (EINVAL); which = (u_int)name[1]; if (which >= RLIM_NLIMITS) return (EINVAL); if (req->newptr != NULL && req->newlen != sizeof(rlim)) return (EINVAL); flags = PGET_HOLD | PGET_NOTWEXIT; if (req->newptr != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); /* * Retrieve limit. */ if (req->oldptr != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); } error = SYSCTL_OUT(req, &rlim, sizeof(rlim)); if (error != 0) goto errout; /* * Set limit. */ if (req->newptr != NULL) { error = SYSCTL_IN(req, &rlim, sizeof(rlim)); if (error == 0) error = kern_proc_setrlimit(curthread, p, which, &rlim); } errout: PRELE(p); return (error); } /* * This sysctl allows a process to retrieve ps_strings structure location of * another process. */ static int sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; vm_offset_t ps_strings; int error; #ifdef COMPAT_FREEBSD32 uint32_t ps_strings32; #endif if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 if ((req->flags & SCTL_MASK32) != 0) { /* * We return 0 if the 32 bit emulation request is for a 64 bit * process. */ ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ? PTROUT(p->p_sysent->sv_psstrings) : 0; PROC_UNLOCK(p); error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32)); return (error); } #endif ps_strings = p->p_sysent->sv_psstrings; PROC_UNLOCK(p); error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings)); return (error); } /* * This sysctl allows a process to retrieve umask of another process. */ static int sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int error; u_short cmask; pid_t pid; if (namelen != 1) return (EINVAL); pid = (pid_t)name[0]; p = curproc; if (pid == p->p_pid || pid == 0) { cmask = p->p_pd->pd_cmask; goto out; } error = pget(pid, PGET_WANTREAD, &p); if (error != 0) return (error); cmask = p->p_pd->pd_cmask; PRELE(p); out: error = SYSCTL_OUT(req, &cmask, sizeof(cmask)); return (error); } /* * This sysctl allows a process to set and retrieve binary osreldate of * another process. */ static int sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int flags, error, osrel; if (namelen != 1) return (EINVAL); if (req->newptr != NULL && req->newlen != sizeof(osrel)) return (EINVAL); flags = PGET_HOLD | PGET_NOTWEXIT; if (req->newptr != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel)); if (error != 0) goto errout; if (req->newptr != NULL) { error = SYSCTL_IN(req, &osrel, sizeof(osrel)); if (error != 0) goto errout; if (osrel < 0) { error = EINVAL; goto errout; } p->p_osrel = osrel; } errout: PRELE(p); return (error); } static int sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct kinfo_sigtramp kst; const struct sysentvec *sv; int error; #ifdef COMPAT_FREEBSD32 struct kinfo_sigtramp32 kst32; #endif if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); sv = p->p_sysent; #ifdef COMPAT_FREEBSD32 if ((req->flags & SCTL_MASK32) != 0) { bzero(&kst32, sizeof(kst32)); if (SV_PROC_FLAG(p, SV_ILP32)) { if (sv->sv_sigcode_base != 0) { kst32.ksigtramp_start = sv->sv_sigcode_base; kst32.ksigtramp_end = sv->sv_sigcode_base + - *sv->sv_szsigcode; + ((sv->sv_flags & SV_DSO_SIG) == 0 ? + *sv->sv_szsigcode : + (uintptr_t)sv->sv_szsigcode); } else { kst32.ksigtramp_start = sv->sv_psstrings - *sv->sv_szsigcode; kst32.ksigtramp_end = sv->sv_psstrings; } } PROC_UNLOCK(p); error = SYSCTL_OUT(req, &kst32, sizeof(kst32)); return (error); } #endif bzero(&kst, sizeof(kst)); if (sv->sv_sigcode_base != 0) { kst.ksigtramp_start = (char *)sv->sv_sigcode_base; kst.ksigtramp_end = (char *)sv->sv_sigcode_base + - *sv->sv_szsigcode; + ((sv->sv_flags & SV_DSO_SIG) == 0 ? *sv->sv_szsigcode : + (uintptr_t)sv->sv_szsigcode); } else { kst.ksigtramp_start = (char *)sv->sv_psstrings - *sv->sv_szsigcode; kst.ksigtramp_end = (char *)sv->sv_psstrings; } PROC_UNLOCK(p); error = SYSCTL_OUT(req, &kst, sizeof(kst)); return (error); } static int sysctl_kern_proc_sigfastblk(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; pid_t pid; struct proc *p; struct thread *td1; uintptr_t addr; #ifdef COMPAT_FREEBSD32 uint32_t addr32; #endif int error; if (namelen != 1 || req->newptr != NULL) return (EINVAL); pid = (pid_t)name[0]; error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p); if (error != 0) return (error); PROC_LOCK(p); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { if (!SV_PROC_FLAG(p, SV_ILP32)) { error = EINVAL; goto errlocked; } } #endif if (pid <= PID_MAX) { td1 = FIRST_THREAD_IN_PROC(p); } else { FOREACH_THREAD_IN_PROC(p, td1) { if (td1->td_tid == pid) break; } } if (td1 == NULL) { error = ESRCH; goto errlocked; } /* * The access to the private thread flags. It is fine as far * as no out-of-thin-air values are read from td_pflags, and * usermode read of the td_sigblock_ptr is racy inherently, * since target process might have already changed it * meantime. */ if ((td1->td_pflags & TDP_SIGFASTBLOCK) != 0) addr = (uintptr_t)td1->td_sigblock_ptr; else error = ENOTTY; errlocked: _PRELE(p); PROC_UNLOCK(p); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { addr32 = addr; error = SYSCTL_OUT(req, &addr32, sizeof(addr32)); } else #endif error = SYSCTL_OUT(req, &addr, sizeof(addr)); return (error); } SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT| CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Return process table, no threads"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_args, "Process argument list"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_env, "Process environment"); static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name, "Process syscall vector name (ABI type)"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD), sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Return process table, including threads"); #ifdef COMPAT_FREEBSD7 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries"); #endif static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries"); #if defined(STACK) || defined(DDB) static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks"); #endif static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit, "Process resource limits"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings, "Process ps_strings location"); static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask"); static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel, "Process binary osreldate"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp, "Process signal trampoline location"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGFASTBLK, sigfastblk, CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_sigfastblk, "Thread sigfastblock address"); int allproc_gen; /* * stop_all_proc() purpose is to stop all process which have usermode, * except current process for obvious reasons. This makes it somewhat * unreliable when invoked from multithreaded process. The service * must not be user-callable anyway. */ void stop_all_proc(void) { struct proc *cp, *p; int r, gen; bool restart, seen_stopped, seen_exiting, stopped_some; cp = curproc; allproc_loop: sx_xlock(&allproc_lock); gen = allproc_gen; seen_exiting = seen_stopped = stopped_some = restart = false; LIST_REMOVE(cp, p_list); LIST_INSERT_HEAD(&allproc, cp, p_list); for (;;) { p = LIST_NEXT(cp, p_list); if (p == NULL) break; LIST_REMOVE(cp, p_list); LIST_INSERT_AFTER(p, cp, p_list); PROC_LOCK(p); if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) { PROC_UNLOCK(p); continue; } if ((p->p_flag & P_WEXIT) != 0) { seen_exiting = true; PROC_UNLOCK(p); continue; } if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { /* * Stopped processes are tolerated when there * are no other processes which might continue * them. P_STOPPED_SINGLE but not * P_TOTAL_STOP process still has at least one * thread running. */ seen_stopped = true; PROC_UNLOCK(p); continue; } sx_xunlock(&allproc_lock); _PHOLD(p); r = thread_single(p, SINGLE_ALLPROC); if (r != 0) restart = true; else stopped_some = true; _PRELE(p); PROC_UNLOCK(p); sx_xlock(&allproc_lock); } /* Catch forked children we did not see in iteration. */ if (gen != allproc_gen) restart = true; sx_xunlock(&allproc_lock); if (restart || stopped_some || seen_exiting || seen_stopped) { kern_yield(PRI_USER); goto allproc_loop; } } void resume_all_proc(void) { struct proc *cp, *p; cp = curproc; sx_xlock(&allproc_lock); again: LIST_REMOVE(cp, p_list); LIST_INSERT_HEAD(&allproc, cp, p_list); for (;;) { p = LIST_NEXT(cp, p_list); if (p == NULL) break; LIST_REMOVE(cp, p_list); LIST_INSERT_AFTER(p, cp, p_list); PROC_LOCK(p); if ((p->p_flag & P_TOTAL_STOP) != 0) { sx_xunlock(&allproc_lock); _PHOLD(p); thread_single_end(p, SINGLE_ALLPROC); _PRELE(p); PROC_UNLOCK(p); sx_xlock(&allproc_lock); } else { PROC_UNLOCK(p); } } /* Did the loop above missed any stopped process ? */ FOREACH_PROC_IN_SYSTEM(p) { /* No need for proc lock. */ if ((p->p_flag & P_TOTAL_STOP) != 0) goto again; } sx_xunlock(&allproc_lock); } /* #define TOTAL_STOP_DEBUG 1 */ #ifdef TOTAL_STOP_DEBUG volatile static int ap_resume; #include static int sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS) { int error, val; val = 0; ap_resume = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val != 0) { stop_all_proc(); syncer_suspend(); while (ap_resume == 0) ; syncer_resume(); resume_all_proc(); } return (0); } SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0, sysctl_debug_stop_all_proc, "I", ""); #endif diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c index e47fbbe62bb7..98f7b619e0b7 100644 --- a/sys/kern/kern_sharedpage.c +++ b/sys/kern/kern_sharedpage.c @@ -1,391 +1,405 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010, 2012 Konstantin Belousov * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct sx shared_page_alloc_sx; static vm_object_t shared_page_obj; static int shared_page_free; char *shared_page_mapping; #ifdef RANDOM_FENESTRASX static struct vdso_fxrng_generation *fxrng_shpage_mapping; static bool fxrng_enabled = true; SYSCTL_BOOL(_debug, OID_AUTO, fxrng_vdso_enable, CTLFLAG_RWTUN, &fxrng_enabled, 0, "Enable FXRNG VDSO"); #endif void shared_page_write(int base, int size, const void *data) { bcopy(data, shared_page_mapping + base, size); } static int shared_page_alloc_locked(int size, int align) { int res; res = roundup(shared_page_free, align); if (res + size >= IDX_TO_OFF(shared_page_obj->size)) res = -1; else shared_page_free = res + size; return (res); } int shared_page_alloc(int size, int align) { int res; sx_xlock(&shared_page_alloc_sx); res = shared_page_alloc_locked(size, align); sx_xunlock(&shared_page_alloc_sx); return (res); } int shared_page_fill(int size, int align, const void *data) { int res; sx_xlock(&shared_page_alloc_sx); res = shared_page_alloc_locked(size, align); if (res != -1) shared_page_write(res, size, data); sx_xunlock(&shared_page_alloc_sx); return (res); } static void shared_page_init(void *dummy __unused) { vm_page_t m; vm_offset_t addr; sx_init(&shared_page_alloc_sx, "shpsx"); shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, VM_PROT_DEFAULT, 0, NULL); VM_OBJECT_WLOCK(shared_page_obj); m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_ZERO); VM_OBJECT_WUNLOCK(shared_page_obj); vm_page_valid(m); vm_page_xunbusy(m); addr = kva_alloc(PAGE_SIZE); pmap_qenter(addr, &m, 1); shared_page_mapping = (char *)addr; } SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, NULL); /* * Push the timehands update to the shared page. * * The lockless update scheme is similar to the one used to update the * in-kernel timehands, see sys/kern/kern_tc.c:tc_windup() (which * calls us after the timehands are updated). */ static void timehands_update(struct vdso_sv_tk *svtk) { struct vdso_timehands th; struct vdso_timekeep *tk; uint32_t enabled, idx; enabled = tc_fill_vdso_timehands(&th); th.th_gen = 0; idx = svtk->sv_timekeep_curr; if (++idx >= VDSO_TH_NUM) idx = 0; svtk->sv_timekeep_curr = idx; if (++svtk->sv_timekeep_gen == 0) svtk->sv_timekeep_gen = 1; tk = (struct vdso_timekeep *)(shared_page_mapping + svtk->sv_timekeep_off); tk->tk_th[idx].th_gen = 0; atomic_thread_fence_rel(); if (enabled) tk->tk_th[idx] = th; atomic_store_rel_32(&tk->tk_th[idx].th_gen, svtk->sv_timekeep_gen); atomic_store_rel_32(&tk->tk_current, idx); /* * The ordering of the assignment to tk_enabled relative to * the update of the vdso_timehands is not important. */ tk->tk_enabled = enabled; } #ifdef COMPAT_FREEBSD32 static void timehands_update32(struct vdso_sv_tk *svtk) { struct vdso_timehands32 th; struct vdso_timekeep32 *tk; uint32_t enabled, idx; enabled = tc_fill_vdso_timehands32(&th); th.th_gen = 0; idx = svtk->sv_timekeep_curr; if (++idx >= VDSO_TH_NUM) idx = 0; svtk->sv_timekeep_curr = idx; if (++svtk->sv_timekeep_gen == 0) svtk->sv_timekeep_gen = 1; tk = (struct vdso_timekeep32 *)(shared_page_mapping + svtk->sv_timekeep_off); tk->tk_th[idx].th_gen = 0; atomic_thread_fence_rel(); if (enabled) tk->tk_th[idx] = th; atomic_store_rel_32(&tk->tk_th[idx].th_gen, svtk->sv_timekeep_gen); atomic_store_rel_32(&tk->tk_current, idx); tk->tk_enabled = enabled; } #endif /* * This is hackish, but easiest way to avoid creating list structures * that needs to be iterated over from the hardclock interrupt * context. */ static struct vdso_sv_tk *host_svtk; #ifdef COMPAT_FREEBSD32 static struct vdso_sv_tk *compat32_svtk; #endif void timekeep_push_vdso(void) { if (host_svtk != NULL) timehands_update(host_svtk); #ifdef COMPAT_FREEBSD32 if (compat32_svtk != NULL) timehands_update32(compat32_svtk); #endif } struct vdso_sv_tk * alloc_sv_tk(void) { struct vdso_sv_tk *svtk; int tk_base; uint32_t tk_ver; tk_ver = VDSO_TK_VER_CURR; svtk = malloc(sizeof(struct vdso_sv_tk), M_TEMP, M_WAITOK | M_ZERO); tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) + sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16); KASSERT(tk_base != -1, ("tk_base -1 for native")); shared_page_write(tk_base + offsetof(struct vdso_timekeep, tk_ver), sizeof(uint32_t), &tk_ver); svtk->sv_timekeep_off = tk_base; timekeep_push_vdso(); return (svtk); } #ifdef COMPAT_FREEBSD32 struct vdso_sv_tk * alloc_sv_tk_compat32(void) { struct vdso_sv_tk *svtk; int tk_base; uint32_t tk_ver; svtk = malloc(sizeof(struct vdso_sv_tk), M_TEMP, M_WAITOK | M_ZERO); tk_ver = VDSO_TK_VER_CURR; tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) + sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16); KASSERT(tk_base != -1, ("tk_base -1 for 32bit")); shared_page_write(tk_base + offsetof(struct vdso_timekeep32, tk_ver), sizeof(uint32_t), &tk_ver); svtk->sv_timekeep_off = tk_base; timekeep_push_vdso(); return (svtk); } #endif #ifdef RANDOM_FENESTRASX void fxrng_push_seed_generation(uint64_t gen) { if (fxrng_shpage_mapping == NULL || !fxrng_enabled) return; KASSERT(gen < INT32_MAX, ("fxrng seed version shouldn't roll over a 32-bit counter " "for approximately 456,000 years")); atomic_store_rel_32(&fxrng_shpage_mapping->fx_generation32, (uint32_t)gen); } static void alloc_sv_fxrng_generation(void) { int base; /* * Allocate a full cache line for the fxrng root generation (64-bit * counter, or truncated 32-bit counter on ILP32 userspace). It is * important that the line is not shared with frequently dirtied data, * and the shared page allocator lacks a __read_mostly mechanism. * However, PAGE_SIZE is typically large relative to the amount of * stuff we've got in it so far, so maybe the possible waste isn't an * issue. */ base = shared_page_alloc(CACHE_LINE_SIZE, CACHE_LINE_SIZE); KASSERT(base != -1, ("%s: base allocation failed", __func__)); fxrng_shpage_mapping = (void *)(shared_page_mapping + base); *fxrng_shpage_mapping = (struct vdso_fxrng_generation) { .fx_vdso_version = VDSO_FXRNG_VER_CURR, }; } #endif /* RANDOM_FENESTRASX */ void exec_sysvec_init(void *param) { struct sysentvec *sv; + vm_offset_t sb; #ifdef RANDOM_FENESTRASX ptrdiff_t base; #endif u_int flags; + int res; sv = param; flags = sv->sv_flags; if ((flags & SV_SHP) == 0) return; MPASS(sv->sv_shared_page_obj == NULL); MPASS(sv->sv_shared_page_base != 0); sv->sv_shared_page_obj = shared_page_obj; if ((flags & SV_ABI_MASK) == SV_ABI_FREEBSD) { - sv->sv_sigcode_base = sv->sv_shared_page_base + - shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); + if ((flags & SV_DSO_SIG) != 0) { + sb = sv->sv_shared_page_base; + res = shared_page_fill((uintptr_t)sv->sv_szsigcode, + 16, sv->sv_sigcode); + if (res == -1) + panic("copying sigtramp to shared page"); + sb += res; + sb += sv->sv_sigcodeoff; + sv->sv_sigcode_base = sb; + } else { + sv->sv_sigcode_base = sv->sv_shared_page_base + + shared_page_fill(*(sv->sv_szsigcode), 16, + sv->sv_sigcode); + } } if ((flags & SV_TIMEKEEP) != 0) { #ifdef COMPAT_FREEBSD32 if ((flags & SV_ILP32) != 0) { if ((flags & SV_ABI_MASK) == SV_ABI_FREEBSD) { KASSERT(compat32_svtk == NULL, ("Compat32 already registered")); compat32_svtk = alloc_sv_tk_compat32(); } else { KASSERT(compat32_svtk != NULL, ("Compat32 not registered")); } sv->sv_timekeep_base = sv->sv_shared_page_base + compat32_svtk->sv_timekeep_off; } else { #endif if ((flags & SV_ABI_MASK) == SV_ABI_FREEBSD) { KASSERT(host_svtk == NULL, ("Host already registered")); host_svtk = alloc_sv_tk(); } else { KASSERT(host_svtk != NULL, ("Host not registered")); } sv->sv_timekeep_base = sv->sv_shared_page_base + host_svtk->sv_timekeep_off; #ifdef COMPAT_FREEBSD32 } #endif } #ifdef RANDOM_FENESTRASX if ((flags & (SV_ABI_MASK | SV_RNG_SEED_VER)) == (SV_ABI_FREEBSD | SV_RNG_SEED_VER)) { /* * Only allocate a single VDSO entry for multiple sysentvecs, * i.e., native and COMPAT32. */ if (fxrng_shpage_mapping == NULL) alloc_sv_fxrng_generation(); base = (char *)fxrng_shpage_mapping - shared_page_mapping; sv->sv_fxrng_gen_base = sv->sv_shared_page_base + base; } #endif } void exec_sysvec_init_secondary(struct sysentvec *sv, struct sysentvec *sv2) { MPASS((sv2->sv_flags & SV_ABI_MASK) == (sv->sv_flags & SV_ABI_MASK)); MPASS((sv2->sv_flags & SV_TIMEKEEP) == (sv->sv_flags & SV_TIMEKEEP)); MPASS((sv2->sv_flags & SV_SHP) != 0 && (sv->sv_flags & SV_SHP) != 0); MPASS((sv2->sv_flags & SV_RNG_SEED_VER) == (sv->sv_flags & SV_RNG_SEED_VER)); sv2->sv_shared_page_obj = sv->sv_shared_page_obj; sv2->sv_sigcode_base = sv2->sv_shared_page_base + (sv->sv_sigcode_base - sv->sv_shared_page_base); if ((sv2->sv_flags & SV_ABI_MASK) != SV_ABI_FREEBSD) return; if ((sv2->sv_flags & SV_TIMEKEEP) != 0) { sv2->sv_timekeep_base = sv2->sv_shared_page_base + (sv->sv_timekeep_base - sv->sv_shared_page_base); } if ((sv2->sv_flags & SV_RNG_SEED_VER) != 0) { sv2->sv_fxrng_gen_base = sv2->sv_shared_page_base + (sv->sv_fxrng_gen_base - sv->sv_shared_page_base); } } diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index e0b067927036..81bd1db108f9 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -1,342 +1,344 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSENT_H_ #define _SYS_SYSENT_H_ #include struct rlimit; struct sysent; struct thread; struct ksiginfo; struct syscall_args; enum systrace_probe_t { SYSTRACE_ENTRY, SYSTRACE_RETURN, }; typedef int sy_call_t(struct thread *, void *); typedef void (*systrace_probe_func_t)(struct syscall_args *, enum systrace_probe_t, int); typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *); #ifdef _KERNEL extern systrace_probe_func_t systrace_probe_func; extern bool systrace_enabled; #ifdef KDTRACE_HOOKS #define SYSTRACE_ENABLED() (systrace_enabled) #else #define SYSTRACE_ENABLED() (0) #endif #endif /* _KERNEL */ struct sysent { /* system call table */ sy_call_t *sy_call; /* implementing function */ systrace_args_func_t sy_systrace_args_func; /* optional argument conversion function. */ u_int8_t sy_narg; /* number of arguments */ u_int8_t sy_flags; /* General flags for system calls. */ au_event_t sy_auevent; /* audit event associated with syscall */ u_int32_t sy_entry; /* DTrace entry ID for systrace. */ u_int32_t sy_return; /* DTrace return ID for systrace. */ u_int32_t sy_thrcnt; }; /* * A system call is permitted in capability mode. */ #define SYF_CAPENABLED 0x00000001 #define SY_THR_FLAGMASK 0x7 #define SY_THR_STATIC 0x1 #define SY_THR_DRAINING 0x2 #define SY_THR_ABSENT 0x4 #define SY_THR_INCR 0x8 #ifdef KLD_MODULE #define SY_THR_STATIC_KLD 0 #else #define SY_THR_STATIC_KLD SY_THR_STATIC #endif struct image_params; struct proc; struct __sigset; struct trapframe; struct vnode; struct note_info_list; struct sysentvec { int sv_size; /* number of entries */ struct sysent *sv_table; /* pointer to sysent */ int (*sv_transtrap)(int, int); /* translate trap-to-signal mapping */ int (*sv_fixup)(uintptr_t *, struct image_params *); /* stack fixup function */ void (*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *); /* send signal */ - char *sv_sigcode; /* start of sigtramp code */ + const char *sv_sigcode; /* start of sigtramp code */ int *sv_szsigcode; /* size of sigtramp code */ + int sv_sigcodeoff; char *sv_name; /* name of binary type */ int (*sv_coredump)(struct thread *, struct vnode *, off_t, int); /* function to dump core, or NULL */ int sv_elf_core_osabi; const char *sv_elf_core_abi_vendor; void (*sv_elf_core_prepare_notes)(struct thread *, struct note_info_list *, size_t *); int (*sv_imgact_try)(struct image_params *); vm_size_t (*sv_stackgap)(struct image_params *, uintptr_t *); int (*sv_copyout_auxargs)(struct image_params *, uintptr_t); int sv_minsigstksz; /* minimum signal stack size */ vm_offset_t sv_minuser; /* VM_MIN_ADDRESS */ vm_offset_t sv_maxuser; /* VM_MAXUSER_ADDRESS */ vm_offset_t sv_usrstack; /* USRSTACK */ vm_offset_t sv_psstrings; /* PS_STRINGS */ int sv_stackprot; /* vm protection for stack */ int (*sv_copyout_strings)(struct image_params *, uintptr_t *); void (*sv_setregs)(struct thread *, struct image_params *, uintptr_t); void (*sv_fixlimit)(struct rlimit *, int); u_long *sv_maxssiz; u_int sv_flags; void (*sv_set_syscall_retval)(struct thread *, int); int (*sv_fetch_syscall_args)(struct thread *); const char **sv_syscallnames; vm_offset_t sv_timekeep_base; vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_base; void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); int (*sv_trap)(struct thread *); u_long *sv_hwcap; /* Value passed in AT_HWCAP. */ u_long *sv_hwcap2; /* Value passed in AT_HWCAP2. */ const char *(*sv_machine_arch)(struct proc *); vm_offset_t sv_fxrng_gen_base; void (*sv_onexec_old)(struct thread *td); int (*sv_onexec)(struct proc *, struct image_params *); void (*sv_onexit)(struct proc *); void (*sv_ontdexit)(struct thread *td); int (*sv_setid_allowed)(struct thread *td, struct image_params *imgp); void (*sv_set_fork_retval)(struct thread *); /* Only used on x86 */ }; #define SV_ILP32 0x000100 /* 32-bit executable. */ #define SV_LP64 0x000200 /* 64-bit executable. */ #define SV_IA32 0x004000 /* Intel 32-bit executable. */ #define SV_AOUT 0x008000 /* a.out executable. */ #define SV_SHP 0x010000 /* Shared page. */ #define SV_AVAIL1 0x020000 /* Unused */ #define SV_TIMEKEEP 0x040000 /* Shared page timehands. */ #define SV_ASLR 0x080000 /* ASLR allowed. */ #define SV_RNG_SEED_VER 0x100000 /* random(4) reseed generation. */ #define SV_SIG_DISCIGN 0x200000 /* Do not discard ignored signals */ #define SV_SIG_WAITNDQ 0x400000 /* Wait does not dequeue SIGCHLD */ +#define SV_DSO_SIG 0x800000 /* Signal trampoline packed in dso */ #define SV_ABI_MASK 0xff #define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x)) #define SV_PROC_ABI(p) ((p)->p_sysent->sv_flags & SV_ABI_MASK) #define SV_CURPROC_FLAG(x) SV_PROC_FLAG(curproc, x) #define SV_CURPROC_ABI() SV_PROC_ABI(curproc) /* same as ELFOSABI_XXX, to prevent header pollution */ #define SV_ABI_LINUX 3 #define SV_ABI_FREEBSD 9 #define SV_ABI_UNDEF 255 /* sv_coredump flags */ #define SVC_PT_COREDUMP 0x00000001 /* dump requested by ptrace(2) */ #define SVC_NOCOMPRESS 0x00000002 /* disable compression. */ #define SVC_ALL 0x00000004 /* dump everything */ #ifdef _KERNEL extern struct sysentvec aout_sysvec; extern struct sysent sysent[]; extern const char *syscallnames[]; struct nosys_args { register_t dummy; }; int nosys(struct thread *, struct nosys_args *); #define NO_SYSCALL (-1) struct module; struct syscall_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ int *offset; /* offset into sysent */ struct sysent *new_sysent; /* new sysent */ struct sysent old_sysent; /* old sysent */ int flags; /* flags for syscall_register */ }; /* separate initialization vector so it can be used in a substructure */ #define SYSENT_INIT_VALS(_syscallname) { \ .sy_narg = (sizeof(struct _syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)&sys_##_syscallname, \ .sy_auevent = SYS_AUE_##_syscallname, \ .sy_systrace_args_func = NULL, \ .sy_entry = 0, \ .sy_return = 0, \ .sy_flags = 0, \ .sy_thrcnt = 0 \ } #define MAKE_SYSENT(syscallname) \ static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname); #define MAKE_SYSENT_COMPAT(syscallname) \ static struct sysent syscallname##_sysent = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& syscallname, \ SYS_AUE_##syscallname \ } #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg) \ static struct syscall_module_data name##_syscall_mod = { \ evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL } \ }; \ \ static moduledata_t name##_mod = { \ "sys/" #name, \ syscall_module_handler, \ &name##_syscall_mod \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE) #define SYSCALL_MODULE_HELPER(syscallname) \ static int syscallname##_syscall = SYS_##syscallname; \ MAKE_SYSENT(syscallname); \ SYSCALL_MODULE(syscallname, \ & syscallname##_syscall, & syscallname##_sysent, \ NULL, NULL) #define SYSCALL_MODULE_PRESENT(syscallname) \ (sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys && \ sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys) /* * Syscall registration helpers with resource allocation handling. */ struct syscall_helper_data { struct sysent new_sysent; struct sysent old_sysent; int syscall_no; int registered; }; #define SYSCALL_INIT_HELPER_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& sys_ ## syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& syscallname, \ .sy_auevent = SYS_AUE_##syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = SYS_##syscallname \ } #define SYSCALL_INIT_HELPER(syscallname) \ SYSCALL_INIT_HELPER_F(syscallname, 0) #define SYSCALL_INIT_HELPER_COMPAT(syscallname) \ SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0) #define SYSCALL_INIT_LAST { \ .syscall_no = NO_SYSCALL \ } int syscall_module_handler(struct module *mod, int what, void *arg); int syscall_helper_register(struct syscall_helper_data *sd, int flags); int syscall_helper_unregister(struct syscall_helper_data *sd); /* Implementation, exposed for COMPAT code */ int kern_syscall_register(struct sysent *sysents, int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags); int kern_syscall_deregister(struct sysent *sysents, int offset, const struct sysent *old_sysent); int kern_syscall_module_handler(struct sysent *sysents, struct module *mod, int what, void *arg); int kern_syscall_helper_register(struct sysent *sysents, struct syscall_helper_data *sd, int flags); int kern_syscall_helper_unregister(struct sysent *sysents, struct syscall_helper_data *sd); struct proc; const char *syscallname(struct proc *p, u_int code); /* Special purpose system call functions. */ struct nosys_args; int lkmnosys(struct thread *, struct nosys_args *); int lkmressys(struct thread *, struct nosys_args *); int syscall_thread_enter(struct thread *td, struct sysent *se); void syscall_thread_exit(struct thread *td, struct sysent *se); int shared_page_alloc(int size, int align); int shared_page_fill(int size, int align, const void *data); void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); void exec_sysvec_init_secondary(struct sysentvec *sv, struct sysentvec *sv2); void exec_inittk(void); void exit_onexit(struct proc *p); void exec_free_abi_mappings(struct proc *p); void exec_onexec_old(struct thread *td); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ (sysinit_cfunc_t)exec_sysvec_init, sv); #endif /* _KERNEL */ #endif /* !_SYS_SYSENT_H_ */ diff --git a/sys/tools/amd64_vdso.sh b/sys/tools/amd64_vdso.sh new file mode 100644 index 000000000000..39406eccd2ef --- /dev/null +++ b/sys/tools/amd64_vdso.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# SPDX-License-Identifier: BSD-2-Clause-FreeBSD +# +# Copyright (c) 2021 The FreeBSD Foundation +# All rights reserved. +# +# This software was developed by Konstantin Belousov +# under sponsorship from the FreeBSD Foundation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +set -e + +${CC} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \ + -o sigtramp.pico -I. -I"${S}" -include opt_global.h \ + "${S}"/amd64/amd64/sigtramp.S + +# We need to make vdso as compact as possible, for it to leave space +# for other things in the shared page. For this, we pack everything +# into single loadable segment. +# +# -z rodynamic is undocumented lld-specific option, seemingly required +# for lld to avoid putting dynamic into dedicated writeable segment, +# despite ldscript placement. It is ignored by ld.bfd but ldscript +# alone is enough there. +# +${LD} --shared -Bsymbolic -soname="elf-vdso.so.1" \ + -T "${S}"/conf/vdso_amd64.ldscript \ + --eh-frame-hdr --no-undefined -z rodynamic -z norelro -nmagic \ + --hash-style=sysv --fatal-warnings --strip-all \ + -o elf-vdso.so.1 sigtramp.pico + +${CC} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \ + -o elf-vdso.so.o -I. -I"${S}" -include opt_global.h \ + -DVDSO_NAME=elf_vdso_so_1 -DVDSO_FILE=elf-vdso.so.1 \ + "${S}"/tools/vdso_wrap.S + +${NM} -D elf-vdso.so.1 | \ + awk '/__vdso_sigcode/{printf "#define VDSO_SIGCODE_OFFSET 0x%s\n",$1}' \ + >vdso_offsets.h diff --git a/sys/amd64/amd64/sigtramp.S b/sys/tools/vdso_wrap.S similarity index 60% copy from sys/amd64/amd64/sigtramp.S copy to sys/tools/vdso_wrap.S index 9983edf229a3..807dcf9c06f4 100644 --- a/sys/amd64/amd64/sigtramp.S +++ b/sys/tools/vdso_wrap.S @@ -1,56 +1,48 @@ /*- - * Copyright (c) 2003 Peter Wemm - * All rights reserved. + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2021 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include - -#include - -#include "assym.inc" - - .text -/********************************************************************** - * - * Signal trampoline, copied to top of user stack - * - */ -ENTRY(sigcode) - call *SIGF_HANDLER(%rsp) /* call signal handler */ - lea SIGF_UC(%rsp),%rdi /* get ucontext_t */ - pushq $0 /* junk to fake return addr. */ - movq $SYS_sigreturn,%rax - syscall /* enter kernel with args */ -0: hlt /* trap priviliged instruction */ - jmp 0b +#include - ALIGN_TEXT -esigcode: +#define VDSO_BLOB_START(S) __CONCAT(_binary_, __CONCAT(S, _start)) +#define VDSO_BLOB_END(S) __CONCAT(_binary_, __CONCAT(S, _end)) +#define VDSO_BLOB_SIZE(S) __CONCAT(_binary_, __CONCAT(S, _size)) - .data - .globl szsigcode -szsigcode: - .long esigcode-sigcode + .section .rodata, "a", %progbits + .globl VDSO_BLOB_START(VDSO_NAME) + .type VDSO_BLOB_START(VDSO_NAME), %object + .size VDSO_BLOB_START(VDSO_NAME), 0 +VDSO_BLOB_START(VDSO_NAME): + .incbin __XSTRING(VDSO_FILE) + .globl VDSO_BLOB_END(VDSO_NAME) + .type VDSO_BLOB_END(VDSO_NAME), %object + .size VDSO_BLOB_END(VDSO_NAME), 0 +VDSO_BLOB_END(VDSO_NAME): + .globl VDSO_BLOB_SIZE(VDSO_NAME) + .set VDSO_BLOB_SIZE(VDSO_NAME), . - VDSO_BLOB_START(VDSO_NAME)