diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index fc074ad74e6b..1a2c0e2ef93d 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1,2932 +1,2983 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2017 Dell EMC * Copyright (c) 2000-2001, 2003 David O'Brien * Copyright (c) 1995-1996 Søren Schmidt * Copyright (c) 1996 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_capsicum.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ELF_NOTE_ROUNDSIZE 4 #define OLD_EI_BRAND 8 static int __elfN(check_header)(const Elf_Ehdr *hdr); static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int32_t *osrel, uint32_t *fctl0); static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry); static int __elfN(load_section)(const struct image_params *imgp, vm_ooffset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot); static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel); static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); static bool __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0, uint32_t *fctl0); static vm_prot_t __elfN(trans_prot)(Elf_Word); static Elf_Word __elfN(untrans_prot)(vm_prot_t); static size_t __elfN(prepare_register_notes)(struct thread *td, struct note_info_list *list, struct thread *target_td); SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW | CTLFLAG_MPSAFE, 0, ""); int __elfN(fallback_brand) = -1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); static int elf_legacy_coredump = 0; SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, &elf_legacy_coredump, 0, "include all and only RW pages in core dumps"); int __elfN(nxstack) = #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ || \ defined(__arm__) || defined(__aarch64__) || \ defined(__riscv) 1; #else 0; #endif SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": support PT_GNU_STACK for non-executable stack control"); #if defined(__amd64__) static int __elfN(vdso) = 1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, vdso, CTLFLAG_RWTUN, &__elfN(vdso), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable vdso preloading"); #else static int __elfN(vdso) = 0; #endif #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) int i386_read_exec = 0; SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif static u_long __elfN(pie_base) = ET_DYN_LOAD_ADDR; static int sysctl_pie_base(SYSCTL_HANDLER_ARGS) { u_long val; int error; val = __elfN(pie_base); error = sysctl_handle_long(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if ((val & PAGE_MASK) != 0) return (EINVAL); __elfN(pie_base) = val; return (0); } SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base, CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_pie_base, "LU", "PIE load base without randomization"); SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, ""); #define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr) /* * Enable ASLR by default for 64-bit non-PIE binaries. 32-bit architectures * have limited address space (which can cause issues for applications with * high memory use) so we leave it off there. */ static int __elfN(aslr_enabled) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, &__elfN(aslr_enabled), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable address map randomization"); /* * Enable ASLR by default for 64-bit PIE binaries. */ static int __elfN(pie_aslr_enabled) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, &__elfN(pie_aslr_enabled), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable address map randomization for PIE binaries"); /* * Sbrk is deprecated and it can be assumed that in most cases it will not be * used anyway. This setting is valid only with ASLR enabled, and allows ASLR * to use the bss grow region. */ static int __elfN(aslr_honor_sbrk) = 0; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, &__elfN(aslr_honor_sbrk), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); static int __elfN(aslr_stack) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN, &__elfN(aslr_stack), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable stack address randomization"); static int __elfN(aslr_shared_page) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, shared_page, CTLFLAG_RWTUN, &__elfN(aslr_shared_page), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable shared page address randomization"); static int __elfN(sigfastblock) = 1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock, CTLFLAG_RWTUN, &__elfN(sigfastblock), 0, "enable sigfastblock for new processes"); static bool __elfN(allow_wx) = true; SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx, CTLFLAG_RWTUN, &__elfN(allow_wx), 0, "Allow pages to be mapped simultaneously writable and executable"); static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) Elf_Brandnote __elfN(freebsd_brandnote) = { .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), .hdr.n_descsz = sizeof(int32_t), .hdr.n_type = NT_FREEBSD_ABI_TAG, .vendor = FREEBSD_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = __elfN(freebsd_trans_osrel) }; static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) { uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); *osrel = *(const int32_t *)(p); return (true); } static int GNU_KFREEBSD_ABI_DESC = 3; Elf_Brandnote __elfN(kfreebsd_brandnote) = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = kfreebsd_trans_osrel }; static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); desc = (const Elf32_Word *)p; if (desc[0] != GNU_KFREEBSD_ABI_DESC) return (false); /* * Debian GNU/kFreeBSD embed the earliest compatible kernel version * (__FreeBSD_version: Rxx) in the LSB way. */ *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3]; return (true); } int __elfN(insert_brand_entry)(Elf_Brandinfo *entry) { int i; for (i = 0; i < MAX_BRANDS; i++) { if (elf_brand_list[i] == NULL) { elf_brand_list[i] = entry; break; } } if (i == MAX_BRANDS) { printf("WARNING: %s: could not insert brandinfo entry: %p\n", __func__, entry); return (-1); } return (0); } int __elfN(remove_brand_entry)(Elf_Brandinfo *entry) { int i; for (i = 0; i < MAX_BRANDS; i++) { if (elf_brand_list[i] == entry) { elf_brand_list[i] = NULL; break; } } if (i == MAX_BRANDS) return (-1); return (0); } bool __elfN(brand_inuse)(Elf_Brandinfo *entry) { struct proc *p; bool rval = false; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sysent == entry->sysvec) { rval = true; break; } } sx_sunlock(&allproc_lock); return (rval); } static Elf_Brandinfo * __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int32_t *osrel, uint32_t *fctl0) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; Elf_Brandinfo *bi, *bi_m; bool ret, has_fctl0; int i, interp_name_len; interp_name_len = interp != NULL ? strlen(interp) + 1 : 0; /* * We support four types of branding -- (1) the ELF EI_OSABI field * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string * branding w/in the ELF header, (3) path of the `interp_path' * field, and (4) the ".note.ABI-tag" ELF section. */ /* Look for an ".note.ABI-tag" ELF section */ bi_m = NULL; for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL) continue; if (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0) continue; if (hdr->e_machine == bi->machine && (bi->flags & (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { has_fctl0 = false; *fctl0 = 0; *osrel = 0; ret = __elfN(check_note)(imgp, bi->brand_note, osrel, &has_fctl0, fctl0); /* Give brand a chance to veto check_note's guess */ if (ret && bi->header_supported) { ret = bi->header_supported(imgp, osrel, has_fctl0 ? fctl0 : NULL); } /* * If note checker claimed the binary, but the * interpreter path in the image does not * match default one for the brand, try to * search for other brands with the same * interpreter. Either there is better brand * with the right interpreter, or, failing * this, we return first brand which accepted * our note and, optionally, header. */ if (ret && bi_m == NULL && interp != NULL && (bi->interp_path == NULL || (strlen(bi->interp_path) + 1 != interp_name_len || strncmp(interp, bi->interp_path, interp_name_len) != 0))) { bi_m = bi; ret = 0; } if (ret) return (bi); } } if (bi_m != NULL) return (bi_m); /* If the executable has a brand, search for it in the brand list. */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 || (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)) continue; if (hdr->e_machine == bi->machine && (hdr->e_ident[EI_OSABI] == bi->brand || (bi->compat_3_brand != NULL && strcmp((const char *)&hdr->e_ident[OLD_EI_BRAND], bi->compat_3_brand) == 0))) { /* Looks good, but give brand a chance to veto */ if (bi->header_supported == NULL || bi->header_supported(imgp, NULL, NULL)) { /* * Again, prefer strictly matching * interpreter path. */ if (interp_name_len == 0 && bi->interp_path == NULL) return (bi); if (bi->interp_path != NULL && strlen(bi->interp_path) + 1 == interp_name_len && strncmp(interp, bi->interp_path, interp_name_len) == 0) return (bi); if (bi_m == NULL) bi_m = bi; } } } if (bi_m != NULL) return (bi_m); /* No known brand, see if the header is recognized by any brand */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY || bi->header_supported == NULL) continue; if (hdr->e_machine == bi->machine) { ret = bi->header_supported(imgp, NULL, NULL); if (ret) return (bi); } } /* Lacking a known brand, search for a recognized interpreter. */ if (interp != NULL) { for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || (bi->flags & (BI_BRAND_NOTE_MANDATORY | BI_BRAND_ONLY_STATIC)) != 0) continue; if (hdr->e_machine == bi->machine && bi->interp_path != NULL && /* ELF image p_filesz includes terminating zero */ strlen(bi->interp_path) + 1 == interp_name_len && strncmp(interp, bi->interp_path, interp_name_len) == 0 && (bi->header_supported == NULL || bi->header_supported(imgp, NULL, NULL))) return (bi); } } /* Lacking a recognized interpreter, try the default brand */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 || (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)) continue; if (hdr->e_machine == bi->machine && __elfN(fallback_brand) == bi->brand && (bi->header_supported == NULL || bi->header_supported(imgp, NULL, NULL))) return (bi); } return (NULL); } static bool __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr) { return (hdr->e_phoff <= PAGE_SIZE && (u_int)hdr->e_phentsize * hdr->e_phnum <= PAGE_SIZE - hdr->e_phoff); } static int __elfN(check_header)(const Elf_Ehdr *hdr) { Elf_Brandinfo *bi; int i; if (!IS_ELF(*hdr) || hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA || hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_phentsize != sizeof(Elf_Phdr) || hdr->e_version != ELF_TARG_VER) return (ENOEXEC); /* * Make sure we have at least one brand for this machine. */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi != NULL && bi->machine == hdr->e_machine) break; } if (i == MAX_BRANDS) return (ENOEXEC); return (0); } static int __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot) { struct sf_buf *sf; int error; vm_offset_t off; /* * Create the page if it doesn't exist yet. Ignore errors. */ vm_map_fixed(map, NULL, 0, trunc_page(start), round_page(end) - trunc_page(start), VM_PROT_ALL, VM_PROT_ALL, MAP_CHECK_EXCL); /* * Find the page from the underlying object. */ if (object != NULL) { sf = vm_imgact_map_page(object, offset); if (sf == NULL) return (KERN_FAILURE); off = offset - trunc_page(offset); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); vm_imgact_unmap_page(sf); if (error != 0) return (KERN_FAILURE); } return (KERN_SUCCESS); } static int __elfN(map_insert)(const struct image_params *imgp, vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow) { struct sf_buf *sf; vm_offset_t off; vm_size_t sz; int error, locked, rv; if (start != trunc_page(start)) { rv = __elfN(map_partial)(map, object, offset, start, round_page(start), prot); if (rv != KERN_SUCCESS) return (rv); offset += round_page(start) - start; start = round_page(start); } if (end != round_page(end)) { rv = __elfN(map_partial)(map, object, offset + trunc_page(end) - start, trunc_page(end), end, prot); if (rv != KERN_SUCCESS) return (rv); end = trunc_page(end); } if (start >= end) return (KERN_SUCCESS); if ((offset & PAGE_MASK) != 0) { /* * The mapping is not page aligned. This means that we have * to copy the data. */ rv = vm_map_fixed(map, NULL, 0, start, end - start, prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL); if (rv != KERN_SUCCESS) return (rv); if (object == NULL) return (KERN_SUCCESS); for (; start < end; start += sz) { sf = vm_imgact_map_page(object, offset); if (sf == NULL) return (KERN_FAILURE); off = offset - trunc_page(offset); sz = end - start; if (sz > PAGE_SIZE - off) sz = PAGE_SIZE - off; error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); vm_imgact_unmap_page(sf); if (error != 0) return (KERN_FAILURE); offset += sz; } } else { vm_object_reference(object); rv = vm_map_fixed(map, object, offset, start, end - start, prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL | (object != NULL ? MAP_VN_EXEC : 0)); if (rv != KERN_SUCCESS) { locked = VOP_ISLOCKED(imgp->vp); VOP_UNLOCK(imgp->vp); vm_object_deallocate(object); vn_lock(imgp->vp, locked | LK_RETRY); return (rv); } else if (object != NULL) { MPASS(imgp->vp->v_object == object); VOP_SET_TEXT_CHECKED(imgp->vp); } } return (KERN_SUCCESS); } static int __elfN(load_section)(const struct image_params *imgp, vm_ooffset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot) { struct sf_buf *sf; size_t map_len; vm_map_t map; vm_object_t object; vm_offset_t map_addr; int error, rv, cow; size_t copy_len; vm_ooffset_t file_addr; /* * It's necessary to fail if the filsz + offset taken from the * header is greater than the actual file pager object's size. * If we were to allow this, then the vm_map_find() below would * walk right off the end of the file object and into the ether. * * While I'm here, might as well check for something else that * is invalid: filsz cannot be greater than memsz. */ if ((filsz != 0 && (off_t)filsz + offset > imgp->attr->va_size) || filsz > memsz) { uprintf("elf_load_section: truncated ELF file\n"); return (ENOEXEC); } object = imgp->object; map = &imgp->proc->p_vmspace->vm_map; map_addr = trunc_page((vm_offset_t)vmaddr); file_addr = trunc_page(offset); /* * We have two choices. We can either clear the data in the last page * of an oversized mapping, or we can start the anon mapping a page * early and copy the initialized data into that first page. We * choose the second. */ if (filsz == 0) map_len = 0; else if (memsz > filsz) map_len = trunc_page(offset + filsz) - file_addr; else map_len = round_page(offset + filsz) - file_addr; if (map_len != 0) { /* cow flags: don't dump readonly sections in core */ cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); rv = __elfN(map_insert)(imgp, map, object, file_addr, map_addr, map_addr + map_len, prot, cow); if (rv != KERN_SUCCESS) return (EINVAL); /* we can stop now if we've covered it all */ if (memsz == filsz) return (0); } /* * We have to get the remaining bit of the file into the first part * of the oversized map segment. This is normally because the .data * segment in the file is extended to provide bss. It's a neat idea * to try and save a page, but it's a pain in the behind to implement. */ copy_len = filsz == 0 ? 0 : (offset + filsz) - trunc_page(offset + filsz); map_addr = trunc_page((vm_offset_t)vmaddr + filsz); map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; /* This had damn well better be true! */ if (map_len != 0) { rv = __elfN(map_insert)(imgp, map, NULL, 0, map_addr, map_addr + map_len, prot, 0); if (rv != KERN_SUCCESS) return (EINVAL); } if (copy_len != 0) { sf = vm_imgact_map_page(object, offset + filsz); if (sf == NULL) return (EIO); /* send the page fragment to user space */ error = copyout((caddr_t)sf_buf_kva(sf), (caddr_t)map_addr, copy_len); vm_imgact_unmap_page(sf); if (error != 0) return (error); } /* * Remove write access to the page if it was only granted by map_insert * to allow copyout. */ if ((prot & VM_PROT_WRITE) == 0) vm_map_protect(map, trunc_page(map_addr), round_page(map_addr + map_len), prot, 0, VM_MAP_PROTECT_SET_PROT); return (0); } static int __elfN(load_sections)(const struct image_params *imgp, const Elf_Ehdr *hdr, const Elf_Phdr *phdr, u_long rbase, u_long *base_addrp) { vm_prot_t prot; u_long base_addr; bool first; int error, i; ASSERT_VOP_LOCKED(imgp->vp, __func__); base_addr = 0; first = true; for (i = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0) continue; /* Loadable segment */ prot = __elfN(trans_prot)(phdr[i].p_flags); error = __elfN(load_section)(imgp, phdr[i].p_offset, (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, phdr[i].p_memsz, phdr[i].p_filesz, prot); if (error != 0) return (error); /* * Establish the base address if this is the first segment. */ if (first) { base_addr = trunc_page(phdr[i].p_vaddr + rbase); first = false; } } if (base_addrp != NULL) *base_addrp = base_addr; return (0); } /* * Load the file "file" into memory. It may be either a shared object * or an executable. * * The "addr" reference parameter is in/out. On entry, it specifies * the address where a shared object should be loaded. If the file is * an executable, this value is ignored. On exit, "addr" specifies * where the file was actually loaded. * * The "entry" reference parameter is out only. On exit, it specifies * the entry point for the loaded file. */ static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry) { struct { struct nameidata nd; struct vattr attr; struct image_params image_params; } *tempdata; const Elf_Ehdr *hdr = NULL; const Elf_Phdr *phdr = NULL; struct nameidata *nd; struct vattr *attr; struct image_params *imgp; u_long rbase; u_long base_addr = 0; int error; #ifdef CAPABILITY_MODE /* * XXXJA: This check can go away once we are sufficiently confident * that the checks in namei() are correct. */ if (IN_CAPABILITY_MODE(curthread)) return (ECAPMODE); #endif tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK | M_ZERO); nd = &tempdata->nd; attr = &tempdata->attr; imgp = &tempdata->image_params; /* * Initialize part of the common data */ imgp->proc = p; imgp->attr = attr; NDINIT(nd, LOOKUP, ISOPEN | FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, file); if ((error = namei(nd)) != 0) { nd->ni_vp = NULL; goto fail; } NDFREE_PNBUF(nd); imgp->vp = nd->ni_vp; /* * Check permissions, modes, uid, etc on the file, and "open" it. */ error = exec_check_permissions(imgp); if (error) goto fail; error = exec_map_first_page(imgp); if (error) goto fail; imgp->object = nd->ni_vp->v_object; hdr = (const Elf_Ehdr *)imgp->image_header; if ((error = __elfN(check_header)(hdr)) != 0) goto fail; if (hdr->e_type == ET_DYN) rbase = *addr; else if (hdr->e_type == ET_EXEC) rbase = 0; else { error = ENOEXEC; goto fail; } /* Only support headers that fit within first page for now */ if (!__elfN(phdr_in_zero_page)(hdr)) { error = ENOEXEC; goto fail; } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); if (!aligned(phdr, Elf_Addr)) { error = ENOEXEC; goto fail; } error = __elfN(load_sections)(imgp, hdr, phdr, rbase, &base_addr); if (error != 0) goto fail; if (p->p_sysent->sv_protect != NULL) p->p_sysent->sv_protect(imgp, SVP_INTERP); *addr = base_addr; *entry = (unsigned long)hdr->e_entry + rbase; fail: if (imgp->firstpage) exec_unmap_first_page(imgp); if (nd->ni_vp) { if (imgp->textset) VOP_UNSET_TEXT_CHECKED(nd->ni_vp); vput(nd->ni_vp); } free(tempdata, M_TEMP); return (error); } /* * Select randomized valid address in the map map, between minv and * maxv, with specified alignment. The [minv, maxv) range must belong * to the map. Note that function only allocates the address, it is * up to caller to clamp maxv in a way that the final allocation * length fit into the map. * * Result is returned in *resp, error code indicates that arguments * did not pass sanity checks for overflow and range correctness. */ static int __CONCAT(rnd_, __elfN(base))(vm_map_t map, u_long minv, u_long maxv, u_int align, u_long *resp) { u_long rbase, res; MPASS(vm_map_min(map) <= minv); if (minv >= maxv || minv + align >= maxv || maxv > vm_map_max(map)) { uprintf("Invalid ELF segments layout\n"); return (ENOEXEC); } arc4rand(&rbase, sizeof(rbase), 0); res = roundup(minv, (u_long)align) + rbase % (maxv - minv); res &= ~((u_long)align - 1); if (res >= maxv) res -= align; KASSERT(res >= minv, ("res %#lx < minv %#lx, maxv %#lx rbase %#lx", res, minv, maxv, rbase)); KASSERT(res < maxv, ("res %#lx > maxv %#lx, minv %#lx rbase %#lx", res, maxv, minv, rbase)); *resp = res; return (0); } static int __elfN(enforce_limits)(struct image_params *imgp, const Elf_Ehdr *hdr, const Elf_Phdr *phdr) { struct vmspace *vmspace; const char *err_str; u_long text_size, data_size, total_size, text_addr, data_addr; u_long seg_size, seg_addr; int i; err_str = NULL; text_size = data_size = total_size = text_addr = data_addr = 0; for (i = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0) continue; seg_addr = trunc_page(phdr[i].p_vaddr + imgp->et_dyn_addr); seg_size = round_page(phdr[i].p_memsz + phdr[i].p_vaddr + imgp->et_dyn_addr - seg_addr); /* * Make the largest executable segment the official * text segment and all others data. * * Note that obreak() assumes that data_addr + data_size == end * of data load area, and the ELF file format expects segments * to be sorted by address. If multiple data segments exist, * the last one will be used. */ if ((phdr[i].p_flags & PF_X) != 0 && text_size < seg_size) { text_size = seg_size; text_addr = seg_addr; } else { data_size = seg_size; data_addr = seg_addr; } total_size += seg_size; } if (data_addr == 0 && data_size == 0) { data_addr = text_addr; data_size = text_size; } /* * Check limits. It should be safe to check the * limits after loading the segments since we do * not actually fault in all the segments pages. */ PROC_LOCK(imgp->proc); if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA)) err_str = "Data segment size exceeds process limit"; else if (text_size > maxtsiz) err_str = "Text segment size exceeds system limit"; else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM)) err_str = "Total segment size exceeds process limit"; else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0) err_str = "Data segment size exceeds resource limit"; else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) err_str = "Total segment size exceeds resource limit"; PROC_UNLOCK(imgp->proc); if (err_str != NULL) { uprintf("%s\n", err_str); return (ENOMEM); } vmspace = imgp->proc->p_vmspace; vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; return (0); } static int __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr, char **interpp, bool *free_interpp) { struct thread *td; char *interp; int error, interp_name_len; KASSERT(phdr->p_type == PT_INTERP, ("%s: p_type %u != PT_INTERP", __func__, phdr->p_type)); ASSERT_VOP_LOCKED(imgp->vp, __func__); td = curthread; /* Path to interpreter */ if (phdr->p_filesz < 2 || phdr->p_filesz > MAXPATHLEN) { uprintf("Invalid PT_INTERP\n"); return (ENOEXEC); } interp_name_len = phdr->p_filesz; if (phdr->p_offset > PAGE_SIZE || interp_name_len > PAGE_SIZE - phdr->p_offset) { /* * The vnode lock might be needed by the pagedaemon to * clean pages owned by the vnode. Do not allow sleep * waiting for memory with the vnode locked, instead * try non-sleepable allocation first, and if it * fails, go to the slow path were we drop the lock * and do M_WAITOK. A text reference prevents * modifications to the vnode content. */ interp = malloc(interp_name_len + 1, M_TEMP, M_NOWAIT); if (interp == NULL) { VOP_UNLOCK(imgp->vp); interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } error = vn_rdwr(UIO_READ, imgp->vp, interp, interp_name_len, phdr->p_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, NULL, td); if (error != 0) { free(interp, M_TEMP); uprintf("i/o error PT_INTERP %d\n", error); return (error); } interp[interp_name_len] = '\0'; *interpp = interp; *free_interpp = true; return (0); } interp = __DECONST(char *, imgp->image_header) + phdr->p_offset; if (interp[interp_name_len - 1] != '\0') { uprintf("Invalid PT_INTERP\n"); return (ENOEXEC); } *interpp = interp; *free_interpp = false; return (0); } static int __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info, const char *interp, u_long *addr, u_long *entry) { int error; if (brand_info->interp_newpath != NULL && (brand_info->interp_path == NULL || strcmp(interp, brand_info->interp_path) == 0)) { error = __elfN(load_file)(imgp->proc, brand_info->interp_newpath, addr, entry); if (error == 0) return (0); } error = __elfN(load_file)(imgp->proc, interp, addr, entry); if (error == 0) return (0); uprintf("ELF interpreter %s not found, error %d\n", interp, error); return (error); } /* * Impossible et_dyn_addr initial value indicating that the real base * must be calculated later with some randomization applied. */ #define ET_DYN_ADDR_RAND 1 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) { struct thread *td; const Elf_Ehdr *hdr; const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs; struct vmspace *vmspace; vm_map_t map; char *interp; Elf_Brandinfo *brand_info; struct sysentvec *sv; u_long addr, baddr, entry, proghdr; u_long maxalign, maxsalign, mapsz, maxv, maxv1, anon_loc; uint32_t fctl0; int32_t osrel; bool free_interp; int error, i, n; hdr = (const Elf_Ehdr *)imgp->image_header; /* * Do we have a valid ELF header ? * * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later * if particular brand doesn't support it. */ if (__elfN(check_header)(hdr) != 0 || (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) return (-1); /* * From here on down, we return an errno, not -1, as we've * detected an ELF file. */ if (!__elfN(phdr_in_zero_page)(hdr)) { uprintf("Program headers not in the first page\n"); return (ENOEXEC); } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); if (!aligned(phdr, Elf_Addr)) { uprintf("Unaligned program headers\n"); return (ENOEXEC); } n = error = 0; baddr = 0; osrel = 0; fctl0 = 0; entry = proghdr = 0; interp = NULL; free_interp = false; td = curthread; /* * Somewhat arbitrary, limit accepted max alignment for the * loadable segment to the max supported superpage size. Too * large alignment requests are not useful and are indicators * of corrupted or outright malicious binary. */ maxalign = PAGE_SIZE; maxsalign = PAGE_SIZE * 1024; for (i = MAXPAGESIZES - 1; i > 0; i--) { if (pagesizes[i] > maxsalign) { maxsalign = pagesizes[i]; break; } } mapsz = 0; for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: if (n == 0) baddr = phdr[i].p_vaddr; if (!powerof2(phdr[i].p_align) || phdr[i].p_align > maxsalign) { uprintf("Invalid segment alignment\n"); error = ENOEXEC; goto ret; } if (phdr[i].p_align > maxalign) maxalign = phdr[i].p_align; if (mapsz + phdr[i].p_memsz < mapsz) { uprintf("Mapsize overflow\n"); error = ENOEXEC; goto ret; } mapsz += phdr[i].p_memsz; n++; /* * If this segment contains the program headers, * remember their virtual address for the AT_PHDR * aux entry. Static binaries don't usually include * a PT_PHDR entry. */ if (phdr[i].p_offset == 0 && hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize <= phdr[i].p_filesz) proghdr = phdr[i].p_vaddr + hdr->e_phoff; break; case PT_INTERP: /* Path to interpreter */ if (interp != NULL) { uprintf("Multiple PT_INTERP headers\n"); error = ENOEXEC; goto ret; } error = __elfN(get_interp)(imgp, &phdr[i], &interp, &free_interp); if (error != 0) goto ret; break; case PT_GNU_STACK: if (__elfN(nxstack)) { imgp->stack_prot = __elfN(trans_prot)(phdr[i].p_flags); if ((imgp->stack_prot & VM_PROT_RW) != VM_PROT_RW) { uprintf("Invalid PT_GNU_STACK\n"); error = ENOEXEC; goto ret; } } imgp->stack_sz = phdr[i].p_memsz; break; case PT_PHDR: /* Program header table info */ proghdr = phdr[i].p_vaddr; break; } } brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0); if (brand_info == NULL) { uprintf("ELF binary type \"%u\" not known.\n", hdr->e_ident[EI_OSABI]); error = ENOEXEC; goto ret; } sv = brand_info->sysvec; if (hdr->e_type == ET_DYN) { if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { uprintf("Cannot execute shared object\n"); error = ENOEXEC; goto ret; } /* * Honour the base load address from the dso if it is * non-zero for some reason. */ if (baddr == 0) { if ((sv->sv_flags & SV_ASLR) == 0 || (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) imgp->et_dyn_addr = __elfN(pie_base); else if ((__elfN(pie_aslr_enabled) && (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) || (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0) imgp->et_dyn_addr = ET_DYN_ADDR_RAND; else imgp->et_dyn_addr = __elfN(pie_base); } } /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. * * The VV_TEXT flag prevents modifications to the executable while * the vnode is unlocked. */ VOP_UNLOCK(imgp->vp); /* * Decide whether to enable randomization of user mappings. * First, reset user preferences for the setid binaries. * Then, account for the support of the randomization by the * ABI, by user preferences, and make special treatment for * PIE binaries. */ if (imgp->credential_setid) { PROC_LOCK(imgp->proc); imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE | P2_WXORX_DISABLE | P2_WXORX_ENABLE_EXEC); PROC_UNLOCK(imgp->proc); } if ((sv->sv_flags & SV_ASLR) == 0 || (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 || (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) { KASSERT(imgp->et_dyn_addr != ET_DYN_ADDR_RAND, ("imgp->et_dyn_addr == RAND and !ASLR")); } else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 || (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) || imgp->et_dyn_addr == ET_DYN_ADDR_RAND) { imgp->map_flags |= MAP_ASLR; /* * If user does not care about sbrk, utilize the bss * grow region for mappings as well. We can select * the base for the image anywere and still not suffer * from the fragmentation. */ if (!__elfN(aslr_honor_sbrk) || (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) imgp->map_flags |= MAP_ASLR_IGNSTART; if (__elfN(aslr_stack)) imgp->map_flags |= MAP_ASLR_STACK; if (__elfN(aslr_shared_page)) imgp->imgp_flags |= IMGP_ASLR_SHARED_PAGE; } if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 && (imgp->proc->p_flag2 & P2_WXORX_DISABLE) == 0) || (imgp->proc->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0) imgp->map_flags |= MAP_WXORX; error = exec_new_vmspace(imgp, sv); imgp->proc->p_sysent = sv; imgp->proc->p_elf_brandinfo = brand_info; vmspace = imgp->proc->p_vmspace; map = &vmspace->vm_map; maxv = sv->sv_usrstack; if ((imgp->map_flags & MAP_ASLR_STACK) == 0) maxv -= lim_max(td, RLIMIT_STACK); if (error == 0 && mapsz >= maxv - vm_map_min(map)) { uprintf("Excessive mapping size\n"); error = ENOEXEC; } if (error == 0 && imgp->et_dyn_addr == ET_DYN_ADDR_RAND) { KASSERT((map->flags & MAP_ASLR) != 0, ("ET_DYN_ADDR_RAND but !MAP_ASLR")); error = __CONCAT(rnd_, __elfN(base))(map, vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA), /* reserve half of the address space to interpreter */ maxv / 2, maxalign, &imgp->et_dyn_addr); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto ret; error = __elfN(load_sections)(imgp, hdr, phdr, imgp->et_dyn_addr, NULL); if (error != 0) goto ret; error = __elfN(enforce_limits)(imgp, hdr, phdr); if (error != 0) goto ret; /* * We load the dynamic linker where a userland call * to mmap(0, ...) would put it. The rationale behind this * calculation is that it leaves room for the heap to grow to * its maximum allowed size. */ addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td, RLIMIT_DATA)); if ((map->flags & MAP_ASLR) != 0) { maxv1 = maxv / 2 + addr / 2; error = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1, #if VM_NRESERVLEVEL > 0 pagesizes[VM_NRESERVLEVEL] != 0 ? /* Align anon_loc to the largest superpage size. */ pagesizes[VM_NRESERVLEVEL] : #endif pagesizes[0], &anon_loc); if (error != 0) goto ret; map->anon_loc = anon_loc; } else { map->anon_loc = addr; } entry = (u_long)hdr->e_entry + imgp->et_dyn_addr; imgp->entry_addr = entry; if (sv->sv_protect != NULL) sv->sv_protect(imgp, SVP_IMAGE); if (interp != NULL) { VOP_UNLOCK(imgp->vp); if ((map->flags & MAP_ASLR) != 0) { /* Assume that interpreter fits into 1/4 of AS */ maxv1 = maxv / 2 + addr / 2; error = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1, PAGE_SIZE, &addr); } if (error == 0) { error = __elfN(load_interp)(imgp, brand_info, interp, &addr, &imgp->entry_addr); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto ret; } else addr = imgp->et_dyn_addr; error = exec_map_stack(imgp); if (error != 0) goto ret; /* * Construct auxargs table (used by the copyout_auxargs routine) */ elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_NOWAIT); if (elf_auxargs == NULL) { VOP_UNLOCK(imgp->vp); elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } elf_auxargs->execfd = -1; elf_auxargs->phdr = proghdr + imgp->et_dyn_addr; elf_auxargs->phent = hdr->e_phentsize; elf_auxargs->phnum = hdr->e_phnum; elf_auxargs->pagesz = PAGE_SIZE; elf_auxargs->base = addr; elf_auxargs->flags = 0; elf_auxargs->entry = entry; elf_auxargs->hdr_eflags = hdr->e_flags; imgp->auxargs = elf_auxargs; imgp->interpreted = 0; imgp->reloc_base = addr; imgp->proc->p_osrel = osrel; imgp->proc->p_fctl0 = fctl0; imgp->proc->p_elf_flags = hdr->e_flags; ret: ASSERT_VOP_LOCKED(imgp->vp, "skipped relock"); if (free_interp) free(interp, M_TEMP); return (error); } #define elf_suword __CONCAT(suword, __ELF_WORD_SIZE) int __elfN(freebsd_copyout_auxargs)(struct image_params *imgp, uintptr_t base) { Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; Elf_Auxinfo *argarray, *pos; struct vmspace *vmspace; rlim_t stacksz; int error, oc; uint32_t bsdflags; argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); vmspace = imgp->proc->p_vmspace; if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags); if (imgp->execpathp != 0) AUXARGS_ENTRY_PTR(pos, AT_EXECPATH, imgp->execpathp); AUXARGS_ENTRY(pos, AT_OSRELDATE, imgp->proc->p_ucred->cr_prison->pr_osreldate); if (imgp->canary != 0) { AUXARGS_ENTRY_PTR(pos, AT_CANARY, imgp->canary); AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen); } AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus); if (imgp->pagesizes != 0) { AUXARGS_ENTRY_PTR(pos, AT_PAGESIZES, imgp->pagesizes); AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); } if ((imgp->sysent->sv_flags & SV_TIMEKEEP) != 0) { AUXARGS_ENTRY(pos, AT_TIMEKEEP, vmspace->vm_shp_base + imgp->sysent->sv_timekeep_offset); } AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : imgp->sysent->sv_stackprot); if (imgp->sysent->sv_hwcap != NULL) AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap); if (imgp->sysent->sv_hwcap2 != NULL) AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2); bsdflags = 0; bsdflags |= __elfN(sigfastblock) ? ELF_BSDF_SIGFASTBLK : 0; oc = atomic_load_int(&vm_overcommit); bsdflags |= (oc & (SWAP_RESERVE_FORCE_ON | SWAP_RESERVE_RLIMIT_ON)) != 0 ? ELF_BSDF_VMNOOVERCOMMIT : 0; AUXARGS_ENTRY(pos, AT_BSDFLAGS, bsdflags); AUXARGS_ENTRY(pos, AT_ARGC, imgp->args->argc); AUXARGS_ENTRY_PTR(pos, AT_ARGV, imgp->argv); AUXARGS_ENTRY(pos, AT_ENVC, imgp->args->envc); AUXARGS_ENTRY_PTR(pos, AT_ENVV, imgp->envv); AUXARGS_ENTRY_PTR(pos, AT_PS_STRINGS, imgp->ps_strings); #ifdef RANDOM_FENESTRASX if ((imgp->sysent->sv_flags & SV_RNG_SEED_VER) != 0) { AUXARGS_ENTRY(pos, AT_FXRNG, vmspace->vm_shp_base + imgp->sysent->sv_fxrng_gen_offset); } #endif if ((imgp->sysent->sv_flags & SV_DSO_SIG) != 0 && __elfN(vdso) != 0) { AUXARGS_ENTRY(pos, AT_KPRELOAD, vmspace->vm_shp_base + imgp->sysent->sv_vdso_offset); } AUXARGS_ENTRY(pos, AT_USRSTACKBASE, round_page(vmspace->vm_stacktop)); stacksz = imgp->proc->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur; AUXARGS_ENTRY(pos, AT_USRSTACKLIM, stacksz); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; KASSERT(pos - argarray <= AT_COUNT, ("Too many auxargs")); error = copyout(argarray, (void *)base, sizeof(*argarray) * AT_COUNT); free(argarray, M_TEMP); return (error); } int __elfN(freebsd_fixup)(uintptr_t *stack_base, struct image_params *imgp) { Elf_Addr *base; base = (Elf_Addr *)*stack_base; base--; if (elf_suword(base, imgp->args->argc) == -1) return (EFAULT); *stack_base = (uintptr_t)base; return (0); } /* * Code for generating ELF core dumps. */ typedef void (*segment_callback)(vm_map_entry_t, void *); /* Closure for cb_put_phdr(). */ struct phdr_closure { Elf_Phdr *phdr; /* Program header to fill in */ Elf_Off offset; /* Offset of segment in core file */ }; struct note_info { int type; /* Note type. */ struct regset *regset; /* Register set. */ outfunc_t outfunc; /* Output function. */ void *outarg; /* Argument for the output function. */ size_t outsize; /* Output size. */ TAILQ_ENTRY(note_info) link; /* Link to the next note info. */ }; TAILQ_HEAD(note_info_list, note_info); extern int compress_user_cores; extern int compress_user_cores_level; static void cb_put_phdr(vm_map_entry_t, void *); static void cb_size_segment(vm_map_entry_t, void *); static void each_dumpable_segment(struct thread *, segment_callback, void *, int); static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t, struct note_info_list *, size_t, int); static void __elfN(putnote)(struct thread *td, struct note_info *, struct sbuf *); static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *); static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *); +static void __elfN(note_procstat_kqueues)(void *, struct sbuf *, size_t *); static void note_procstat_files(void *, struct sbuf *, size_t *); static void note_procstat_groups(void *, struct sbuf *, size_t *); static void note_procstat_osrel(void *, struct sbuf *, size_t *); static void note_procstat_rlimit(void *, struct sbuf *, size_t *); static void note_procstat_umask(void *, struct sbuf *, size_t *); static void note_procstat_vmmap(void *, struct sbuf *, size_t *); static int core_compressed_write(void *base, size_t len, off_t offset, void *arg) { return (core_write((struct coredump_params *)arg, base, len, offset, UIO_SYSSPACE, NULL)); } int __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) { struct ucred *cred = td->td_ucred; int compm, error = 0; struct sseg_closure seginfo; struct note_info_list notelst; struct coredump_params params; struct note_info *ninfo; void *hdr, *tmpbuf; size_t hdrsize, notesz, coresize; hdr = NULL; tmpbuf = NULL; TAILQ_INIT(¬elst); /* Size the program segments. */ __elfN(size_segments)(td, &seginfo, flags); /* * Collect info about the core file header area. */ hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count); if (seginfo.count + 1 >= PN_XNUM) hdrsize += sizeof(Elf_Shdr); td->td_proc->p_sysent->sv_elf_core_prepare_notes(td, ¬elst, ¬esz); coresize = round_page(hdrsize + notesz) + seginfo.size; /* Set up core dump parameters. */ params.offset = 0; params.active_cred = cred; params.file_cred = NOCRED; params.td = td; params.vp = vp; params.comp = NULL; #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); error = racct_add(td->td_proc, RACCT_CORE, coresize); PROC_UNLOCK(td->td_proc); if (error != 0) { error = EFAULT; goto done; } } #endif if (coresize >= limit) { error = EFAULT; goto done; } /* Create a compression stream if necessary. */ compm = compress_user_cores; if ((flags & (SVC_PT_COREDUMP | SVC_NOCOMPRESS)) == SVC_PT_COREDUMP && compm == 0) compm = COMPRESS_GZIP; if (compm != 0) { params.comp = compressor_init(core_compressed_write, compm, CORE_BUF_SIZE, compress_user_cores_level, ¶ms); if (params.comp == NULL) { error = EFAULT; goto done; } tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); } /* * Allocate memory for building the header, fill it up, * and write it out following the notes. */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); error = __elfN(corehdr)(¶ms, seginfo.count, hdr, hdrsize, ¬elst, notesz, flags); /* Write the contents of all of the writable segments. */ if (error == 0) { Elf_Phdr *php; off_t offset; int i; php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; offset = round_page(hdrsize + notesz); for (i = 0; i < seginfo.count; i++) { error = core_output((char *)(uintptr_t)php->p_vaddr, php->p_filesz, offset, ¶ms, tmpbuf); if (error != 0) break; offset += php->p_filesz; php++; } if (error == 0 && params.comp != NULL) error = compressor_flush(params.comp); } if (error) { log(LOG_WARNING, "Failed to write core file for process %s (error %d)\n", curproc->p_comm, error); } done: free(tmpbuf, M_TEMP); if (params.comp != NULL) compressor_fini(params.comp); while ((ninfo = TAILQ_FIRST(¬elst)) != NULL) { TAILQ_REMOVE(¬elst, ninfo, link); free(ninfo, M_TEMP); } if (hdr != NULL) free(hdr, M_TEMP); return (error); } /* * A callback for each_dumpable_segment() to write out the segment's * program header entry. */ static void cb_put_phdr(vm_map_entry_t entry, void *closure) { struct phdr_closure *phc = (struct phdr_closure *)closure; Elf_Phdr *phdr = phc->phdr; phc->offset = round_page(phc->offset); phdr->p_type = PT_LOAD; phdr->p_offset = phc->offset; phdr->p_vaddr = entry->start; phdr->p_paddr = 0; phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; phdr->p_align = PAGE_SIZE; phdr->p_flags = __elfN(untrans_prot)(entry->protection); phc->offset += phdr->p_filesz; phc->phdr++; } /* * A callback for each_dumpable_segment() to gather information about * the number of segments and their total size. */ static void cb_size_segment(vm_map_entry_t entry, void *closure) { struct sseg_closure *ssc = (struct sseg_closure *)closure; ssc->count++; ssc->size += entry->end - entry->start; } void __elfN(size_segments)(struct thread *td, struct sseg_closure *seginfo, int flags) { seginfo->count = 0; seginfo->size = 0; each_dumpable_segment(td, cb_size_segment, seginfo, flags); } /* * For each writable segment in the process's memory map, call the given * function with a pointer to the map entry and some arbitrary * caller-supplied data. */ static void each_dumpable_segment(struct thread *td, segment_callback func, void *closure, int flags) { struct proc *p = td->td_proc; vm_map_t map = &p->p_vmspace->vm_map; vm_map_entry_t entry; vm_object_t backing_object, object; bool ignore_entry; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { /* * Don't dump inaccessible mappings, deal with legacy * coredump mode. * * Note that read-only segments related to the elf binary * are marked MAP_ENTRY_NOCOREDUMP now so we no longer * need to arbitrarily ignore such segments. */ if ((flags & SVC_ALL) == 0) { if (elf_legacy_coredump) { if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) continue; } else { if ((entry->protection & VM_PROT_ALL) == 0) continue; } } /* * Dont include memory segment in the coredump if * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in * madvise(2). Do not dump submaps (i.e. parts of the * kernel map). */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) continue; if ((entry->eflags & MAP_ENTRY_NOCOREDUMP) != 0 && (flags & SVC_ALL) == 0) continue; if ((object = entry->object.vm_object) == NULL) continue; /* Ignore memory-mapped devices and such things. */ VM_OBJECT_RLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_RLOCK(backing_object); VM_OBJECT_RUNLOCK(object); object = backing_object; } ignore_entry = (object->flags & OBJ_FICTITIOUS) != 0; VM_OBJECT_RUNLOCK(object); if (ignore_entry) continue; (*func)(entry, closure); } vm_map_unlock_read(map); } /* * Write the core file header to the file, including padding up to * the page boundary. */ static int __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr, size_t hdrsize, struct note_info_list *notelst, size_t notesz, int flags) { struct note_info *ninfo; struct sbuf *sb; int error; /* Fill in the header. */ bzero(hdr, hdrsize); __elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz, flags); sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_core_output, p); sbuf_start_section(sb, NULL); sbuf_bcat(sb, hdr, hdrsize); TAILQ_FOREACH(ninfo, notelst, link) __elfN(putnote)(p->td, ninfo, sb); /* Align up to a page boundary for the program segments. */ sbuf_end_section(sb, -1, PAGE_SIZE, 0); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } void __elfN(prepare_notes)(struct thread *td, struct note_info_list *list, size_t *sizep) { struct proc *p; struct thread *thr; size_t size; p = td->td_proc; size = 0; size += __elfN(register_note)(td, list, NT_PRPSINFO, __elfN(note_prpsinfo), p); /* * To have the debugger select the right thread (LWP) as the initial * thread, we dump the state of the thread passed to us in td first. * This is the thread that causes the core dump and thus likely to * be the right thread one wants to have selected in the debugger. */ thr = td; while (thr != NULL) { size += __elfN(prepare_register_notes)(td, list, thr); size += __elfN(register_note)(td, list, -1, __elfN(note_threadmd), thr); thr = thr == td ? TAILQ_FIRST(&p->p_threads) : TAILQ_NEXT(thr, td_plist); if (thr == td) thr = TAILQ_NEXT(thr, td_plist); } size += __elfN(register_note)(td, list, NT_PROCSTAT_PROC, __elfN(note_procstat_proc), p); size += __elfN(register_note)(td, list, NT_PROCSTAT_FILES, note_procstat_files, p); size += __elfN(register_note)(td, list, NT_PROCSTAT_VMMAP, note_procstat_vmmap, p); size += __elfN(register_note)(td, list, NT_PROCSTAT_GROUPS, note_procstat_groups, p); size += __elfN(register_note)(td, list, NT_PROCSTAT_UMASK, note_procstat_umask, p); size += __elfN(register_note)(td, list, NT_PROCSTAT_RLIMIT, note_procstat_rlimit, p); size += __elfN(register_note)(td, list, NT_PROCSTAT_OSREL, note_procstat_osrel, p); size += __elfN(register_note)(td, list, NT_PROCSTAT_PSSTRINGS, __elfN(note_procstat_psstrings), p); size += __elfN(register_note)(td, list, NT_PROCSTAT_AUXV, __elfN(note_procstat_auxv), p); + size += __elfN(register_note)(td, list, NT_PROCSTAT_KQUEUES, + __elfN(note_procstat_kqueues), p); *sizep = size; } void __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs, size_t notesz, int flags) { Elf_Ehdr *ehdr; Elf_Phdr *phdr; Elf_Shdr *shdr; struct phdr_closure phc; Elf_Brandinfo *bi; ehdr = (Elf_Ehdr *)hdr; bi = td->td_proc->p_elf_brandinfo; ehdr->e_ident[EI_MAG0] = ELFMAG0; ehdr->e_ident[EI_MAG1] = ELFMAG1; ehdr->e_ident[EI_MAG2] = ELFMAG2; ehdr->e_ident[EI_MAG3] = ELFMAG3; ehdr->e_ident[EI_CLASS] = ELF_CLASS; ehdr->e_ident[EI_DATA] = ELF_DATA; ehdr->e_ident[EI_VERSION] = EV_CURRENT; ehdr->e_ident[EI_OSABI] = td->td_proc->p_sysent->sv_elf_core_osabi; ehdr->e_ident[EI_ABIVERSION] = 0; ehdr->e_ident[EI_PAD] = 0; ehdr->e_type = ET_CORE; ehdr->e_machine = bi->machine; ehdr->e_version = EV_CURRENT; ehdr->e_entry = 0; ehdr->e_phoff = sizeof(Elf_Ehdr); ehdr->e_flags = td->td_proc->p_elf_flags; ehdr->e_ehsize = sizeof(Elf_Ehdr); ehdr->e_phentsize = sizeof(Elf_Phdr); ehdr->e_shentsize = sizeof(Elf_Shdr); ehdr->e_shstrndx = SHN_UNDEF; if (numsegs + 1 < PN_XNUM) { ehdr->e_phnum = numsegs + 1; ehdr->e_shnum = 0; } else { ehdr->e_phnum = PN_XNUM; ehdr->e_shnum = 1; ehdr->e_shoff = ehdr->e_phoff + (numsegs + 1) * ehdr->e_phentsize; KASSERT(ehdr->e_shoff == hdrsize - sizeof(Elf_Shdr), ("e_shoff: %zu, hdrsize - shdr: %zu", (size_t)ehdr->e_shoff, hdrsize - sizeof(Elf_Shdr))); shdr = (Elf_Shdr *)((char *)hdr + ehdr->e_shoff); memset(shdr, 0, sizeof(*shdr)); /* * A special first section is used to hold large segment and * section counts. This was proposed by Sun Microsystems in * Solaris and has been adopted by Linux; the standard ELF * tools are already familiar with the technique. * * See table 7-7 of the Solaris "Linker and Libraries Guide" * (or 12-7 depending on the version of the document) for more * details. */ shdr->sh_type = SHT_NULL; shdr->sh_size = ehdr->e_shnum; shdr->sh_link = ehdr->e_shstrndx; shdr->sh_info = numsegs + 1; } /* * Fill in the program header entries. */ phdr = (Elf_Phdr *)((char *)hdr + ehdr->e_phoff); /* The note segement. */ phdr->p_type = PT_NOTE; phdr->p_offset = hdrsize; phdr->p_vaddr = 0; phdr->p_paddr = 0; phdr->p_filesz = notesz; phdr->p_memsz = 0; phdr->p_flags = PF_R; phdr->p_align = ELF_NOTE_ROUNDSIZE; phdr++; /* All the writable segments from the program. */ phc.phdr = phdr; phc.offset = round_page(hdrsize + notesz); each_dumpable_segment(td, cb_put_phdr, &phc, flags); } static size_t __elfN(register_regset_note)(struct thread *td, struct note_info_list *list, struct regset *regset, struct thread *target_td) { const struct sysentvec *sv; struct note_info *ninfo; size_t size, notesize; size = 0; if (!regset->get(regset, target_td, NULL, &size) || size == 0) return (0); ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK); ninfo->type = regset->note; ninfo->regset = regset; ninfo->outarg = target_td; ninfo->outsize = size; TAILQ_INSERT_TAIL(list, ninfo, link); sv = td->td_proc->p_sysent; notesize = sizeof(Elf_Note) + /* note header */ roundup2(strlen(sv->sv_elf_core_abi_vendor) + 1, ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } size_t __elfN(register_note)(struct thread *td, struct note_info_list *list, int type, outfunc_t out, void *arg) { const struct sysentvec *sv; struct note_info *ninfo; size_t size, notesize; sv = td->td_proc->p_sysent; size = 0; out(arg, NULL, &size); ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK); ninfo->type = type; ninfo->outfunc = out; ninfo->outarg = arg; ninfo->outsize = size; TAILQ_INSERT_TAIL(list, ninfo, link); if (type == -1) return (size); notesize = sizeof(Elf_Note) + /* note header */ roundup2(strlen(sv->sv_elf_core_abi_vendor) + 1, ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } static size_t append_note_data(const void *src, void *dst, size_t len) { size_t padded_len; padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE); if (dst != NULL) { bcopy(src, dst, len); bzero((char *)dst + len, padded_len - len); } return (padded_len); } size_t __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp) { Elf_Note *note; char *buf; size_t notesize; buf = dst; if (buf != NULL) { note = (Elf_Note *)buf; note->n_namesz = sizeof(FREEBSD_ABI_VENDOR); note->n_descsz = size; note->n_type = type; buf += sizeof(*note); buf += append_note_data(FREEBSD_ABI_VENDOR, buf, sizeof(FREEBSD_ABI_VENDOR)); append_note_data(src, buf, size); if (descp != NULL) *descp = buf; } notesize = sizeof(Elf_Note) + /* note header */ roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } static void __elfN(putnote)(struct thread *td, struct note_info *ninfo, struct sbuf *sb) { Elf_Note note; const struct sysentvec *sv; ssize_t old_len, sect_len; size_t new_len, descsz, i; if (ninfo->type == -1) { ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); return; } sv = td->td_proc->p_sysent; note.n_namesz = strlen(sv->sv_elf_core_abi_vendor) + 1; note.n_descsz = ninfo->outsize; note.n_type = ninfo->type; sbuf_bcat(sb, ¬e, sizeof(note)); sbuf_start_section(sb, &old_len); sbuf_bcat(sb, sv->sv_elf_core_abi_vendor, strlen(sv->sv_elf_core_abi_vendor) + 1); sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); if (note.n_descsz == 0) return; sbuf_start_section(sb, &old_len); if (ninfo->regset != NULL) { struct regset *regset = ninfo->regset; void *buf; buf = malloc(ninfo->outsize, M_TEMP, M_ZERO | M_WAITOK); (void)regset->get(regset, ninfo->outarg, buf, &ninfo->outsize); sbuf_bcat(sb, buf, ninfo->outsize); free(buf, M_TEMP); } else ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); if (sect_len < 0) return; new_len = (size_t)sect_len; descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE); if (new_len < descsz) { /* * It is expected that individual note emitters will correctly * predict their expected output size and fill up to that size * themselves, padding in a format-specific way if needed. * However, in case they don't, just do it here with zeros. */ for (i = 0; i < descsz - new_len; i++) sbuf_putc(sb, 0); } else if (new_len > descsz) { /* * We can't always truncate sb -- we may have drained some * of it already. */ KASSERT(new_len == descsz, ("%s: Note type %u changed as we " "read it (%zu > %zu). Since it is longer than " "expected, this coredump's notes are corrupt. THIS " "IS A BUG in the note_procstat routine for type %u.\n", __func__, (unsigned)note.n_type, new_len, descsz, (unsigned)note.n_type)); } } /* * Miscellaneous note out functions. */ #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 #include #include typedef struct prstatus32 elf_prstatus_t; typedef struct prpsinfo32 elf_prpsinfo_t; typedef struct fpreg32 elf_prfpregset_t; typedef struct fpreg32 elf_fpregset_t; typedef struct reg32 elf_gregset_t; typedef struct thrmisc32 elf_thrmisc_t; typedef struct ptrace_lwpinfo32 elf_lwpinfo_t; #define ELF_KERN_PROC_MASK KERN_PROC_MASK32 typedef struct kinfo_proc32 elf_kinfo_proc_t; typedef uint32_t elf_ps_strings_t; #else typedef prstatus_t elf_prstatus_t; typedef prpsinfo_t elf_prpsinfo_t; typedef prfpregset_t elf_prfpregset_t; typedef prfpregset_t elf_fpregset_t; typedef gregset_t elf_gregset_t; typedef thrmisc_t elf_thrmisc_t; typedef struct ptrace_lwpinfo elf_lwpinfo_t; #define ELF_KERN_PROC_MASK 0 typedef struct kinfo_proc elf_kinfo_proc_t; typedef vm_offset_t elf_ps_strings_t; #endif static void __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep) { struct sbuf sbarg; size_t len; char *cp, *end; struct proc *p; elf_prpsinfo_t *psinfo; int error; p = arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*psinfo), ("invalid size")); psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK); psinfo->pr_version = PRPSINFO_VERSION; psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); PROC_LOCK(p); if (p->p_args != NULL) { len = sizeof(psinfo->pr_psargs) - 1; if (len > p->p_args->ar_length) len = p->p_args->ar_length; memcpy(psinfo->pr_psargs, p->p_args->ar_args, len); PROC_UNLOCK(p); error = 0; } else { _PHOLD(p); PROC_UNLOCK(p); sbuf_new(&sbarg, psinfo->pr_psargs, sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN); error = proc_getargv(curthread, p, &sbarg); PRELE(p); if (sbuf_finish(&sbarg) == 0) { len = sbuf_len(&sbarg); if (len > 0) len--; } else { len = sizeof(psinfo->pr_psargs) - 1; } sbuf_delete(&sbarg); } if (error != 0 || len == 0 || (ssize_t)len == -1) strlcpy(psinfo->pr_psargs, p->p_comm, sizeof(psinfo->pr_psargs)); else { KASSERT(len < sizeof(psinfo->pr_psargs), ("len is too long: %zu vs %zu", len, sizeof(psinfo->pr_psargs))); cp = psinfo->pr_psargs; end = cp + len - 1; for (;;) { cp = memchr(cp, '\0', end - cp); if (cp == NULL) break; *cp = ' '; } } psinfo->pr_pid = p->p_pid; sbuf_bcat(sb, psinfo, sizeof(*psinfo)); free(psinfo, M_TEMP); } *sizep = sizeof(*psinfo); } static bool __elfN(get_prstatus)(struct regset *rs, struct thread *td, void *buf, size_t *sizep) { elf_prstatus_t *status; if (buf != NULL) { KASSERT(*sizep == sizeof(*status), ("%s: invalid size", __func__)); status = buf; memset(status, 0, *sizep); status->pr_version = PRSTATUS_VERSION; status->pr_statussz = sizeof(elf_prstatus_t); status->pr_gregsetsz = sizeof(elf_gregset_t); status->pr_fpregsetsz = sizeof(elf_fpregset_t); status->pr_osreldate = osreldate; status->pr_cursig = td->td_proc->p_sig; status->pr_pid = td->td_tid; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 fill_regs32(td, &status->pr_reg); #else fill_regs(td, &status->pr_reg); #endif } *sizep = sizeof(*status); return (true); } static bool __elfN(set_prstatus)(struct regset *rs, struct thread *td, void *buf, size_t size) { elf_prstatus_t *status; KASSERT(size == sizeof(*status), ("%s: invalid size", __func__)); status = buf; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 set_regs32(td, &status->pr_reg); #else set_regs(td, &status->pr_reg); #endif return (true); } static struct regset __elfN(regset_prstatus) = { .note = NT_PRSTATUS, .size = sizeof(elf_prstatus_t), .get = __elfN(get_prstatus), .set = __elfN(set_prstatus), }; ELF_REGSET(__elfN(regset_prstatus)); static bool __elfN(get_fpregset)(struct regset *rs, struct thread *td, void *buf, size_t *sizep) { elf_prfpregset_t *fpregset; if (buf != NULL) { KASSERT(*sizep == sizeof(*fpregset), ("%s: invalid size", __func__)); fpregset = buf; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 fill_fpregs32(td, fpregset); #else fill_fpregs(td, fpregset); #endif } *sizep = sizeof(*fpregset); return (true); } static bool __elfN(set_fpregset)(struct regset *rs, struct thread *td, void *buf, size_t size) { elf_prfpregset_t *fpregset; fpregset = buf; KASSERT(size == sizeof(*fpregset), ("%s: invalid size", __func__)); #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 set_fpregs32(td, fpregset); #else set_fpregs(td, fpregset); #endif return (true); } static struct regset __elfN(regset_fpregset) = { .note = NT_FPREGSET, .size = sizeof(elf_prfpregset_t), .get = __elfN(get_fpregset), .set = __elfN(set_fpregset), }; ELF_REGSET(__elfN(regset_fpregset)); static bool __elfN(get_thrmisc)(struct regset *rs, struct thread *td, void *buf, size_t *sizep) { elf_thrmisc_t *thrmisc; if (buf != NULL) { KASSERT(*sizep == sizeof(*thrmisc), ("%s: invalid size", __func__)); thrmisc = buf; bzero(thrmisc, sizeof(*thrmisc)); strcpy(thrmisc->pr_tname, td->td_name); } *sizep = sizeof(*thrmisc); return (true); } static struct regset __elfN(regset_thrmisc) = { .note = NT_THRMISC, .size = sizeof(elf_thrmisc_t), .get = __elfN(get_thrmisc), }; ELF_REGSET(__elfN(regset_thrmisc)); static bool __elfN(get_lwpinfo)(struct regset *rs, struct thread *td, void *buf, size_t *sizep) { elf_lwpinfo_t pl; size_t size; int structsize; size = sizeof(structsize) + sizeof(pl); if (buf != NULL) { KASSERT(*sizep == size, ("%s: invalid size", __func__)); structsize = sizeof(pl); memcpy(buf, &structsize, sizeof(structsize)); bzero(&pl, sizeof(pl)); pl.pl_lwpid = td->td_tid; pl.pl_event = PL_EVENT_NONE; pl.pl_sigmask = td->td_sigmask; pl.pl_siglist = td->td_siglist; if (td->td_si.si_signo != 0) { pl.pl_event = PL_EVENT_SIGNAL; pl.pl_flags |= PL_FLAG_SI; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 siginfo_to_siginfo32(&td->td_si, &pl.pl_siginfo); #else pl.pl_siginfo = td->td_si; #endif } strcpy(pl.pl_tdname, td->td_name); /* XXX TODO: supply more information in struct ptrace_lwpinfo*/ memcpy((int *)buf + 1, &pl, sizeof(pl)); } *sizep = size; return (true); } static struct regset __elfN(regset_lwpinfo) = { .note = NT_PTLWPINFO, .size = sizeof(int) + sizeof(elf_lwpinfo_t), .get = __elfN(get_lwpinfo), }; ELF_REGSET(__elfN(regset_lwpinfo)); static size_t __elfN(prepare_register_notes)(struct thread *td, struct note_info_list *list, struct thread *target_td) { struct sysentvec *sv = td->td_proc->p_sysent; struct regset **regsetp, **regset_end, *regset; size_t size; size = 0; if (target_td == td) cpu_update_pcb(target_td); /* NT_PRSTATUS must be the first register set note. */ size += __elfN(register_regset_note)(td, list, &__elfN(regset_prstatus), target_td); regsetp = sv->sv_regset_begin; if (regsetp == NULL) { /* XXX: This shouldn't be true for any FreeBSD ABIs. */ size += __elfN(register_regset_note)(td, list, &__elfN(regset_fpregset), target_td); return (size); } regset_end = sv->sv_regset_end; MPASS(regset_end != NULL); for (; regsetp < regset_end; regsetp++) { regset = *regsetp; if (regset->note == NT_PRSTATUS) continue; size += __elfN(register_regset_note)(td, list, regset, target_td); } return (size); } /* * Allow for MD specific notes, as well as any MD * specific preparations for writing MI notes. */ static void __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; void *buf; size_t size; td = (struct thread *)arg; size = *sizep; if (size != 0 && sb != NULL) buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK); else buf = NULL; size = 0; __elfN(dump_thread)(td, buf, &size); KASSERT(sb == NULL || *sizep == size, ("invalid size")); if (size != 0 && sb != NULL) sbuf_bcat(sb, buf, size); free(buf, M_TEMP); *sizep = size; } #ifdef KINFO_PROC_SIZE CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); #endif static void __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = arg; size = sizeof(structsize) + p->p_numthreads * sizeof(elf_kinfo_proc_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(elf_kinfo_proc_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); sx_slock(&proctree_lock); PROC_LOCK(p); kern_proc_out(p, sb, ELF_KERN_PROC_MASK); sx_sunlock(&proctree_lock); } *sizep = size; } #ifdef KINFO_FILE_SIZE CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); #endif static void note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size, sect_sz, i; ssize_t start_len, sect_len; int structsize, filedesc_flags; if (coredump_pack_fileinfo) filedesc_flags = KERN_FILEDESC_PACK_KINFO; else filedesc_flags = 0; p = arg; structsize = sizeof(struct kinfo_file); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_count_drain, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_filedesc_out(p, sb, -1, filedesc_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { sbuf_start_section(sb, &start_len); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize), filedesc_flags); sect_len = sbuf_end_section(sb, start_len, 0, 0); if (sect_len < 0) return; sect_sz = sect_len; KASSERT(sect_sz <= *sizep, ("kern_proc_filedesc_out did not respect maxlen; " "requested %zu, got %zu", *sizep - sizeof(structsize), sect_sz - sizeof(structsize))); for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) sbuf_putc(sb, 0); } } #ifdef KINFO_VMENTRY_SIZE CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); #endif static void note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize, vmmap_flags; if (coredump_pack_vmmapinfo) vmmap_flags = KERN_VMMAP_PACK_KINFO; else vmmap_flags = 0; p = arg; structsize = sizeof(struct kinfo_vmentry); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_count_drain, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_vmmap_out(p, sb, -1, vmmap_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize), vmmap_flags); } } static void note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = arg; size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(gid_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups * sizeof(gid_t)); } *sizep = size; } static void note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = arg; size = sizeof(structsize) + sizeof(p->p_pd->pd_cmask); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(p->p_pd->pd_cmask); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &p->p_pd->pd_cmask, sizeof(p->p_pd->pd_cmask)); } *sizep = size; } static void note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; struct rlimit rlim[RLIM_NLIMITS]; size_t size; int structsize, i; p = arg; size = sizeof(structsize) + sizeof(rlim); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(rlim); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); for (i = 0; i < RLIM_NLIMITS; i++) lim_rlimit_proc(p, i, &rlim[i]); PROC_UNLOCK(p); sbuf_bcat(sb, rlim, sizeof(rlim)); } *sizep = size; } static void note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = arg; size = sizeof(structsize) + sizeof(p->p_osrel); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(p->p_osrel); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel)); } *sizep = size; } static void __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; elf_ps_strings_t ps_strings; size_t size; int structsize; p = arg; size = sizeof(structsize) + sizeof(ps_strings); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(ps_strings); #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 ps_strings = PTROUT(PROC_PS_STRINGS(p)); #else ps_strings = PROC_PS_STRINGS(p); #endif sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &ps_strings, sizeof(ps_strings)); } *sizep = size; } static void __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = arg; if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, AT_COUNT * sizeof(Elf_Auxinfo), SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_count_drain, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PHOLD(p); proc_getauxv(curthread, p, sb); PRELE(p); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { structsize = sizeof(Elf_Auxinfo); sbuf_bcat(sb, &structsize, sizeof(structsize)); PHOLD(p); proc_getauxv(curthread, p, sb); PRELE(p); } } +static void +__elfN(note_procstat_kqueues)(void *arg, struct sbuf *sb, size_t *sizep) +{ + struct proc *p; + size_t size, sect_sz, i; + ssize_t start_len, sect_len; + int structsize; + bool compat32; + +#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 + compat32 = true; + structsize = sizeof(struct kinfo_knote32); +#else + compat32 = false; + structsize = sizeof(struct kinfo_knote); +#endif + p = arg; + if (sb == NULL) { + size = 0; + sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); + sbuf_set_drain(sb, sbuf_count_drain, &size); + sbuf_bcat(sb, &structsize, sizeof(structsize)); + kern_proc_kqueues_out(p, sb, -1, compat32); + sbuf_finish(sb); + sbuf_delete(sb); + *sizep = size; + } else { + sbuf_start_section(sb, &start_len); + + sbuf_bcat(sb, &structsize, sizeof(structsize)); + kern_proc_kqueues_out(p, sb, *sizep - sizeof(structsize), + compat32); + + sect_len = sbuf_end_section(sb, start_len, 0, 0); + if (sect_len < 0) + return; + sect_sz = sect_len; + + KASSERT(sect_sz <= *sizep, + ("kern_proc_kqueue_out did not respect maxlen; " + "requested %zu, got %zu", *sizep - sizeof(structsize), + sect_sz - sizeof(structsize))); + + for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) + sbuf_putc(sb, 0); + } +} + #define MAX_NOTES_LOOP 4096 bool __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote, const char *note_vendor, const Elf_Phdr *pnote, bool (*cb)(const Elf_Note *, void *, bool *), void *cb_arg) { const Elf_Note *note, *note0, *note_end; const char *note_name; char *buf; int i, error; bool res; /* We need some limit, might as well use PAGE_SIZE. */ if (pnote == NULL || pnote->p_filesz > PAGE_SIZE) return (false); ASSERT_VOP_LOCKED(imgp->vp, "parse_notes"); if (pnote->p_offset > PAGE_SIZE || pnote->p_filesz > PAGE_SIZE - pnote->p_offset) { buf = malloc(pnote->p_filesz, M_TEMP, M_NOWAIT); if (buf == NULL) { VOP_UNLOCK(imgp->vp); buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz, pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL, curthread); if (error != 0) { uprintf("i/o error PT_NOTE\n"); goto retf; } note = note0 = (const Elf_Note *)buf; note_end = (const Elf_Note *)(buf + pnote->p_filesz); } else { note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset); note_end = (const Elf_Note *)(imgp->image_header + pnote->p_offset + pnote->p_filesz); buf = NULL; } for (i = 0; i < MAX_NOTES_LOOP && note >= note0 && note < note_end; i++) { if (!aligned(note, Elf32_Addr)) { uprintf("Unaligned ELF note\n"); goto retf; } if ((const char *)note_end - (const char *)note < sizeof(Elf_Note)) { uprintf("ELF note to short\n"); goto retf; } if (note->n_namesz != checknote->n_namesz || note->n_descsz != checknote->n_descsz || note->n_type != checknote->n_type) goto nextnote; note_name = (const char *)(note + 1); if (note_name + checknote->n_namesz >= (const char *)note_end || strncmp(note_vendor, note_name, checknote->n_namesz) != 0) goto nextnote; if (cb(note, cb_arg, &res)) goto ret; nextnote: note = (const Elf_Note *)((const char *)(note + 1) + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) + roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE)); } if (i >= MAX_NOTES_LOOP) uprintf("ELF note parser reached %d notes\n", i); retf: res = false; ret: free(buf, M_TEMP); return (res); } struct brandnote_cb_arg { Elf_Brandnote *brandnote; int32_t *osrel; }; static bool brandnote_cb(const Elf_Note *note, void *arg0, bool *res) { struct brandnote_cb_arg *arg; arg = arg0; /* * Fetch the osreldate for binary from the ELF OSABI-note if * necessary. */ *res = (arg->brandnote->flags & BN_TRANSLATE_OSREL) != 0 && arg->brandnote->trans_osrel != NULL ? arg->brandnote->trans_osrel(note, arg->osrel) : true; return (true); } static Elf_Note fctl_note = { .n_namesz = sizeof(FREEBSD_ABI_VENDOR), .n_descsz = sizeof(uint32_t), .n_type = NT_FREEBSD_FEATURE_CTL, }; struct fctl_cb_arg { bool *has_fctl0; uint32_t *fctl0; }; static bool note_fctl_cb(const Elf_Note *note, void *arg0, bool *res) { struct fctl_cb_arg *arg; const Elf32_Word *desc; uintptr_t p; arg = arg0; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); desc = (const Elf32_Word *)p; *arg->has_fctl0 = true; *arg->fctl0 = desc[0]; *res = true; return (true); } /* * Try to find the appropriate ABI-note section for checknote, fetch * the osreldate and feature control flags for binary from the ELF * OSABI-note. Only the first page of the image is searched, the same * as for headers. */ static bool __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote, int32_t *osrel, bool *has_fctl0, uint32_t *fctl0) { const Elf_Phdr *phdr; const Elf_Ehdr *hdr; struct brandnote_cb_arg b_arg; struct fctl_cb_arg f_arg; int i, j; hdr = (const Elf_Ehdr *)imgp->image_header; phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); b_arg.brandnote = brandnote; b_arg.osrel = osrel; f_arg.has_fctl0 = has_fctl0; f_arg.fctl0 = fctl0; for (i = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp, &brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb, &b_arg)) { for (j = 0; j < hdr->e_phnum; j++) { if (phdr[j].p_type == PT_NOTE && __elfN(parse_notes)(imgp, &fctl_note, FREEBSD_ABI_VENDOR, &phdr[j], note_fctl_cb, &f_arg)) break; } return (true); } } return (false); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw __elfN(execsw) = { .ex_imgact = __CONCAT(exec_, __elfN(imgact)), .ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) }; EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); static vm_prot_t __elfN(trans_prot)(Elf_Word flags) { vm_prot_t prot; prot = 0; if (flags & PF_X) prot |= VM_PROT_EXECUTE; if (flags & PF_W) prot |= VM_PROT_WRITE; if (flags & PF_R) prot |= VM_PROT_READ; #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) if (i386_read_exec && (flags & PF_R)) prot |= VM_PROT_EXECUTE; #endif return (prot); } static Elf_Word __elfN(untrans_prot)(vm_prot_t prot) { Elf_Word flags; flags = 0; if (prot & VM_PROT_EXECUTE) flags |= PF_X; if (prot & VM_PROT_READ) flags |= PF_R; if (prot & VM_PROT_WRITE) flags |= PF_W; return (flags); } diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index c7260f6bb267..8188323bdbc9 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,2999 +1,3046 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1999,2000,2001 Jonathan Lemon * Copyright 2004 John-Mark Gurney * Copyright (c) 2009 Apple, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ktrace.h" #include "opt_kqueue.h" #ifdef COMPAT_FREEBSD11 #define _WANT_FREEBSD11_KEVENT #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #ifdef COMPAT_FREEBSD32 #include #include #endif #include static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); /* * This lock is used if multiple kq locks are required. This possibly * should be made into a per proc lock. */ static struct mtx kq_global; MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); #define KQ_GLOBAL_LOCK(lck, haslck) do { \ if (!haslck) \ mtx_lock(lck); \ haslck = 1; \ } while (0) #define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ if (haslck) \ mtx_unlock(lck); \ haslck = 0; \ } while (0) TASKQUEUE_DEFINE_THREAD(kqueue_ctx); static int kevent_copyout(void *arg, struct kevent *kevp, int count); static int kevent_copyin(void *arg, struct kevent *kevp, int count); static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int mflag); static int kqueue_acquire(struct file *fp, struct kqueue **kqp); static void kqueue_release(struct kqueue *kq, int locked); static void kqueue_destroy(struct kqueue *kq); static void kqueue_drain(struct kqueue *kq, struct thread *td); static int kqueue_expand(struct kqueue *kq, const struct filterops *fops, uintptr_t ident, int mflag); static void kqueue_task(void *arg, int pending); static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *timeout, struct kevent *keva, struct thread *td); static void kqueue_wakeup(struct kqueue *kq); static const struct filterops *kqueue_fo_find(int filt); static void kqueue_fo_release(int filt); struct g_kevent_args; static int kern_kevent_generic(struct thread *td, struct g_kevent_args *uap, struct kevent_copyops *k_ops, const char *struct_name); static fo_ioctl_t kqueue_ioctl; static fo_poll_t kqueue_poll; static fo_kqfilter_t kqueue_kqfilter; static fo_stat_t kqueue_stat; static fo_close_t kqueue_close; static fo_fill_kinfo_t kqueue_fill_kinfo; static const struct fileops kqueueops = { .fo_read = invfo_rdwr, .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, .fo_ioctl = kqueue_ioctl, .fo_poll = kqueue_poll, .fo_kqfilter = kqueue_kqfilter, .fo_stat = kqueue_stat, .fo_close = kqueue_close, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_cmp = file_kcmp_generic, .fo_fill_kinfo = kqueue_fill_kinfo, }; static int knote_attach(struct knote *kn, struct kqueue *kq); static void knote_drop(struct knote *kn, struct thread *td); static void knote_drop_detached(struct knote *kn, struct thread *td); static void knote_enqueue(struct knote *kn); static void knote_dequeue(struct knote *kn); static void knote_init(void); static struct knote *knote_alloc(int mflag); static void knote_free(struct knote *kn); static void filt_kqdetach(struct knote *kn); static int filt_kqueue(struct knote *kn, long hint); static int filt_procattach(struct knote *kn); static void filt_procdetach(struct knote *kn); static int filt_proc(struct knote *kn, long hint); static int filt_fileattach(struct knote *kn); static void filt_timerexpire(void *knx); static void filt_timerexpire_l(struct knote *kn, bool proc_locked); static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); static void filt_timerstart(struct knote *kn, sbintime_t to); static void filt_timertouch(struct knote *kn, struct kevent *kev, u_long type); static int filt_timervalidate(struct knote *kn, sbintime_t *to); static int filt_timer(struct knote *kn, long hint); static int filt_userattach(struct knote *kn); static void filt_userdetach(struct knote *kn); static int filt_user(struct knote *kn, long hint); static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type); static const struct filterops file_filtops = { .f_isfd = 1, .f_attach = filt_fileattach, }; static const struct filterops kqread_filtops = { .f_isfd = 1, .f_detach = filt_kqdetach, .f_event = filt_kqueue, }; /* XXX - move to kern_proc.c? */ static const struct filterops proc_filtops = { .f_isfd = 0, .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, }; static const struct filterops timer_filtops = { .f_isfd = 0, .f_attach = filt_timerattach, .f_detach = filt_timerdetach, .f_event = filt_timer, .f_touch = filt_timertouch, }; static const struct filterops user_filtops = { .f_attach = filt_userattach, .f_detach = filt_userdetach, .f_event = filt_user, .f_touch = filt_usertouch, }; static uma_zone_t knote_zone; static unsigned int __exclusive_cache_line kq_ncallouts; static unsigned int kq_calloutmax = 4 * 1024; SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); /* XXX - ensure not influx ? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ else \ KQ_LOCK((kn)->kn_kq); \ (kn)->kn_status |= KN_ACTIVE; \ if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ knote_enqueue((kn)); \ if (!(islock)) \ KQ_UNLOCK((kn)->kn_kq); \ } while (0) #define KQ_LOCK(kq) do { \ mtx_lock(&(kq)->kq_lock); \ } while (0) #define KQ_FLUX_WAKEUP(kq) do { \ if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ (kq)->kq_state &= ~KQ_FLUXWAIT; \ wakeup((kq)); \ } \ } while (0) #define KQ_UNLOCK_FLUX(kq) do { \ KQ_FLUX_WAKEUP(kq); \ mtx_unlock(&(kq)->kq_lock); \ } while (0) #define KQ_UNLOCK(kq) do { \ mtx_unlock(&(kq)->kq_lock); \ } while (0) #define KQ_OWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_OWNED); \ } while (0) #define KQ_NOTOWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ } while (0) static struct knlist * kn_list_lock(struct knote *kn) { struct knlist *knl; knl = kn->kn_knlist; if (knl != NULL) knl->kl_lock(knl->kl_lockarg); return (knl); } static void kn_list_unlock(struct knlist *knl) { bool do_free; if (knl == NULL) return; do_free = knl->kl_autodestroy && knlist_empty(knl); knl->kl_unlock(knl->kl_lockarg); if (do_free) { knlist_destroy(knl); free(knl, M_KQUEUE); } } static bool kn_in_flux(struct knote *kn) { return (kn->kn_influx > 0); } static void kn_enter_flux(struct knote *kn) { KQ_OWNED(kn->kn_kq); MPASS(kn->kn_influx < INT_MAX); kn->kn_influx++; } static bool kn_leave_flux(struct knote *kn) { KQ_OWNED(kn->kn_kq); MPASS(kn->kn_influx > 0); kn->kn_influx--; return (kn->kn_influx == 0); } #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ else \ KNL_ASSERT_UNLOCKED(knl); \ } while (0) #ifdef INVARIANTS #define KNL_ASSERT_LOCKED(knl) do { \ knl->kl_assert_lock((knl)->kl_lockarg, LA_LOCKED); \ } while (0) #define KNL_ASSERT_UNLOCKED(knl) do { \ knl->kl_assert_lock((knl)->kl_lockarg, LA_UNLOCKED); \ } while (0) #else /* !INVARIANTS */ #define KNL_ASSERT_LOCKED(knl) do {} while (0) #define KNL_ASSERT_UNLOCKED(knl) do {} while (0) #endif /* INVARIANTS */ #ifndef KN_HASHSIZE #define KN_HASHSIZE 64 /* XXX should be tunable */ #endif #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) static int filt_nullattach(struct knote *kn) { return (ENXIO); }; static const struct filterops null_filtops = { .f_isfd = 0, .f_attach = filt_nullattach, }; /* XXX - make SYSINIT to add these, and move into respective modules. */ extern const struct filterops sig_filtops; extern const struct filterops fs_filtops; /* * Table for all system-defined filters. */ static struct mtx filterops_lock; MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", MTX_DEF); static struct { const struct filterops *for_fop; int for_nolock; int for_refcnt; } sysfilt_ops[EVFILT_SYSCOUNT] = { [~EVFILT_READ] = { &file_filtops, 1 }, [~EVFILT_WRITE] = { &file_filtops, 1 }, [~EVFILT_AIO] = { &null_filtops }, [~EVFILT_VNODE] = { &file_filtops, 1 }, [~EVFILT_PROC] = { &proc_filtops, 1 }, [~EVFILT_SIGNAL] = { &sig_filtops, 1 }, [~EVFILT_TIMER] = { &timer_filtops, 1 }, [~EVFILT_PROCDESC] = { &file_filtops, 1 }, [~EVFILT_FS] = { &fs_filtops, 1 }, [~EVFILT_LIO] = { &null_filtops }, [~EVFILT_USER] = { &user_filtops, 1 }, [~EVFILT_SENDFILE] = { &null_filtops }, [~EVFILT_EMPTY] = { &file_filtops, 1 }, }; /* * Simple redirection for all cdevsw style objects to call their fo_kqfilter * method. */ static int filt_fileattach(struct knote *kn) { return (fo_kqfilter(kn->kn_fp, kn)); } /*ARGSUSED*/ static int kqueue_kqfilter(struct file *fp, struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; if (kn->kn_filter != EVFILT_READ) return (EINVAL); kn->kn_status |= KN_KQUEUE; kn->kn_fop = &kqread_filtops; knlist_add(&kq->kq_sel.si_note, kn, 0); return (0); } static void filt_kqdetach(struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; knlist_remove(&kq->kq_sel.si_note, kn, 0); } /*ARGSUSED*/ static int filt_kqueue(struct knote *kn, long hint) { struct kqueue *kq = kn->kn_fp->f_data; kn->kn_data = kq->kq_count; return (kn->kn_data > 0); } /* XXX - move to kern_proc.c? */ static int filt_procattach(struct knote *kn) { struct proc *p; int error; bool exiting, immediate; exiting = immediate = false; if (kn->kn_sfflags & NOTE_EXIT) p = pfind_any(kn->kn_id); else p = pfind(kn->kn_id); if (p == NULL) return (ESRCH); if (p->p_flag & P_WEXIT) exiting = true; if ((error = p_cansee(curthread, p))) { PROC_UNLOCK(p); return (error); } kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ /* * Internal flag indicating registration done by kernel for the * purposes of getting a NOTE_CHILD notification. */ if (kn->kn_flags & EV_FLAG2) { kn->kn_flags &= ~EV_FLAG2; kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK); immediate = true; /* Force immediate activation of child note. */ } /* * Internal flag indicating registration done by kernel (for other than * NOTE_CHILD). */ if (kn->kn_flags & EV_FLAG1) { kn->kn_flags &= ~EV_FLAG1; } knlist_add(p->p_klist, kn, 1); /* * Immediately activate any child notes or, in the case of a zombie * target process, exit notes. The latter is necessary to handle the * case where the target process, e.g. a child, dies before the kevent * is registered. */ if (immediate || (exiting && filt_proc(kn, NOTE_EXIT))) KNOTE_ACTIVATE(kn, 0); PROC_UNLOCK(p); return (0); } /* * The knote may be attached to a different process, which may exit, * leaving nothing for the knote to be attached to. So when the process * exits, the knote is marked as DETACHED and also flagged as ONESHOT so * it will be deleted when read out. However, as part of the knote deletion, * this routine is called, so a check is needed to avoid actually performing * a detach, because the original process does not exist any more. */ /* XXX - move to kern_proc.c? */ static void filt_procdetach(struct knote *kn) { knlist_remove(kn->kn_knlist, kn, 0); kn->kn_ptr.p_proc = NULL; } /* XXX - move to kern_proc.c? */ static int filt_proc(struct knote *kn, long hint) { struct proc *p; u_int event; p = kn->kn_ptr.p_proc; if (p == NULL) /* already activated, from attach filter */ return (0); /* Mask off extra data. */ event = (u_int)hint & NOTE_PCTRLMASK; /* If the user is interested in this event, record it. */ if (kn->kn_sfflags & event) kn->kn_fflags |= event; /* Process is gone, so flag the event as finished. */ if (event == NOTE_EXIT) { kn->kn_flags |= EV_EOF | EV_ONESHOT; kn->kn_ptr.p_proc = NULL; if (kn->kn_fflags & NOTE_EXIT) kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig); if (kn->kn_fflags == 0) kn->kn_flags |= EV_DROP; return (1); } return (kn->kn_fflags != 0); } /* * Called when the process forked. It mostly does the same as the * knote(), activating all knotes registered to be activated when the * process forked. Additionally, for each knote attached to the * parent, check whether user wants to track the new process. If so * attach a new knote to it, and immediately report an event with the * child's pid. */ void knote_fork(struct knlist *list, int pid) { struct kqueue *kq; struct knote *kn; struct kevent kev; int error; MPASS(list != NULL); KNL_ASSERT_LOCKED(list); if (SLIST_EMPTY(&list->kl_list)) return; memset(&kev, 0, sizeof(kev)); SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; KQ_LOCK(kq); if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { KQ_UNLOCK(kq); continue; } /* * The same as knote(), activate the event. */ if ((kn->kn_sfflags & NOTE_TRACK) == 0) { if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK(kq); continue; } /* * The NOTE_TRACK case. In addition to the activation * of the event, we need to register new events to * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ kn_enter_flux(kn); KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); /* * Activate existing knote and register tracking knotes with * new process. * * First register a knote to get just the child notice. This * must be a separate note from a potential NOTE_EXIT * notification since both NOTE_CHILD and NOTE_EXIT are defined * to use the data field (in conflicting ways). */ kev.ident = pid; kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, M_NOWAIT); if (error) kn->kn_fflags |= NOTE_TRACKERR; /* * Then register another knote to track other potential events * from the new process. */ kev.ident = pid; kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, M_NOWAIT); if (error) kn->kn_fflags |= NOTE_TRACKERR; if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); list->kl_lock(list->kl_lockarg); KQ_LOCK(kq); kn_leave_flux(kn); KQ_UNLOCK_FLUX(kq); } } /* * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the * interval timer support code. */ #define NOTE_TIMER_PRECMASK \ (NOTE_SECONDS | NOTE_MSECONDS | NOTE_USECONDS | NOTE_NSECONDS) static sbintime_t timer2sbintime(int64_t data, int flags) { int64_t secs; /* * Macros for converting to the fractional second portion of an * sbintime_t using 64bit multiplication to improve precision. */ #define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32) #define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32) #define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32) switch (flags & NOTE_TIMER_PRECMASK) { case NOTE_SECONDS: #ifdef __LP64__ if (data > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return ((sbintime_t)data << 32); case NOTE_MSECONDS: /* FALLTHROUGH */ case 0: if (data >= 1000) { secs = data / 1000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return (secs << 32 | MS_TO_SBT(data % 1000)); } return (MS_TO_SBT(data)); case NOTE_USECONDS: if (data >= 1000000) { secs = data / 1000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return (secs << 32 | US_TO_SBT(data % 1000000)); } return (US_TO_SBT(data)); case NOTE_NSECONDS: if (data >= 1000000000) { secs = data / 1000000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return (secs << 32 | NS_TO_SBT(data % 1000000000)); } return (NS_TO_SBT(data)); default: break; } return (-1); } struct kq_timer_cb_data { struct callout c; struct proc *p; struct knote *kn; int cpuid; int flags; TAILQ_ENTRY(kq_timer_cb_data) link; sbintime_t next; /* next timer event fires at */ sbintime_t to; /* precalculated timer period, 0 for abs */ }; #define KQ_TIMER_CB_ENQUEUED 0x01 static void kqtimer_sched_callout(struct kq_timer_cb_data *kc) { callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kc->kn, kc->cpuid, C_ABSOLUTE); } void kqtimer_proc_continue(struct proc *p) { struct kq_timer_cb_data *kc, *kc1; struct bintime bt; sbintime_t now; PROC_LOCK_ASSERT(p, MA_OWNED); getboottimebin(&bt); now = bttosbt(bt); TAILQ_FOREACH_SAFE(kc, &p->p_kqtim_stop, link, kc1) { TAILQ_REMOVE(&p->p_kqtim_stop, kc, link); kc->flags &= ~KQ_TIMER_CB_ENQUEUED; if (kc->next <= now) filt_timerexpire_l(kc->kn, true); else kqtimer_sched_callout(kc); } } static void filt_timerexpire_l(struct knote *kn, bool proc_locked) { struct kq_timer_cb_data *kc; struct proc *p; uint64_t delta; sbintime_t now; kc = kn->kn_ptr.p_v; if ((kn->kn_flags & EV_ONESHOT) != 0 || kc->to == 0) { kn->kn_data++; KNOTE_ACTIVATE(kn, 0); return; } now = sbinuptime(); if (now >= kc->next) { delta = (now - kc->next) / kc->to; if (delta == 0) delta = 1; kn->kn_data += delta; kc->next += delta * kc->to; if (now >= kc->next) /* overflow */ kc->next = now + kc->to; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ } /* * Initial check for stopped kc->p is racy. It is fine to * miss the set of the stop flags, at worst we would schedule * one more callout. On the other hand, it is not fine to not * schedule when we we missed clearing of the flags, we * recheck them under the lock and observe consistent state. */ p = kc->p; if (P_SHOULDSTOP(p) || P_KILLED(p)) { if (!proc_locked) PROC_LOCK(p); if (P_SHOULDSTOP(p) || P_KILLED(p)) { if ((kc->flags & KQ_TIMER_CB_ENQUEUED) == 0) { kc->flags |= KQ_TIMER_CB_ENQUEUED; TAILQ_INSERT_TAIL(&p->p_kqtim_stop, kc, link); } if (!proc_locked) PROC_UNLOCK(p); return; } if (!proc_locked) PROC_UNLOCK(p); } kqtimer_sched_callout(kc); } static void filt_timerexpire(void *knx) { filt_timerexpire_l(knx, false); } /* * data contains amount of time to sleep */ static int filt_timervalidate(struct knote *kn, sbintime_t *to) { struct bintime bt; sbintime_t sbt; if (kn->kn_sdata < 0) return (EINVAL); if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) kn->kn_sdata = 1; /* * The only fflags values supported are the timer unit * (precision) and the absolute time indicator. */ if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0) return (EINVAL); *to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); if (*to < 0) return (EINVAL); if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { getboottimebin(&bt); sbt = bttosbt(bt); *to = MAX(0, *to - sbt); } return (0); } static int filt_timerattach(struct knote *kn) { struct kq_timer_cb_data *kc; sbintime_t to; int error; to = -1; error = filt_timervalidate(kn, &to); if (error != 0) return (error); KASSERT(to > 0 || (kn->kn_flags & EV_ONESHOT) != 0 || (kn->kn_sfflags & NOTE_ABSTIME) != 0, ("%s: periodic timer has a calculated zero timeout", __func__)); KASSERT(to >= 0, ("%s: timer has a calculated negative timeout", __func__)); if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) { atomic_subtract_int(&kq_ncallouts, 1); return (ENOMEM); } if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); kc->kn = kn; kc->p = curproc; kc->cpuid = PCPU_GET(cpuid); kc->flags = 0; callout_init(&kc->c, 1); filt_timerstart(kn, to); return (0); } static void filt_timerstart(struct knote *kn, sbintime_t to) { struct kq_timer_cb_data *kc; kc = kn->kn_ptr.p_v; if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { kc->next = to; kc->to = 0; } else { kc->next = to + sbinuptime(); kc->to = to; } kqtimer_sched_callout(kc); } static void filt_timerdetach(struct knote *kn) { struct kq_timer_cb_data *kc; unsigned int old __unused; bool pending; kc = kn->kn_ptr.p_v; do { callout_drain(&kc->c); /* * kqtimer_proc_continue() might have rescheduled this callout. * Double-check, using the process mutex as an interlock. */ PROC_LOCK(kc->p); if ((kc->flags & KQ_TIMER_CB_ENQUEUED) != 0) { kc->flags &= ~KQ_TIMER_CB_ENQUEUED; TAILQ_REMOVE(&kc->p->p_kqtim_stop, kc, link); } pending = callout_pending(&kc->c); PROC_UNLOCK(kc->p); } while (pending); free(kc, M_KQUEUE); old = atomic_fetchadd_int(&kq_ncallouts, -1); KASSERT(old > 0, ("Number of callouts cannot become negative")); kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } static void filt_timertouch(struct knote *kn, struct kevent *kev, u_long type) { struct kq_timer_cb_data *kc; struct kqueue *kq; sbintime_t to; int error; switch (type) { case EVENT_REGISTER: /* Handle re-added timers that update data/fflags */ if (kev->flags & EV_ADD) { kc = kn->kn_ptr.p_v; /* Drain any existing callout. */ callout_drain(&kc->c); /* Throw away any existing undelivered record * of the timer expiration. This is done under * the presumption that if a process is * re-adding this timer with new parameters, * it is no longer interested in what may have * happened under the old parameters. If it is * interested, it can wait for the expiration, * delete the old timer definition, and then * add the new one. * * This has to be done while the kq is locked: * - if enqueued, dequeue * - make it no longer active * - clear the count of expiration events */ kq = kn->kn_kq; KQ_LOCK(kq); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); kn->kn_status &= ~KN_ACTIVE; kn->kn_data = 0; KQ_UNLOCK(kq); /* Reschedule timer based on new data/fflags */ kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; error = filt_timervalidate(kn, &to); if (error != 0) { kn->kn_flags |= EV_ERROR; kn->kn_data = error; } else filt_timerstart(kn, to); } break; case EVENT_PROCESS: *kev = kn->kn_kevent; if (kn->kn_flags & EV_CLEAR) { kn->kn_data = 0; kn->kn_fflags = 0; } break; default: panic("filt_timertouch() - invalid type (%ld)", type); break; } } static int filt_timer(struct knote *kn, long hint) { return (kn->kn_data != 0); } static int filt_userattach(struct knote *kn) { /* * EVFILT_USER knotes are not attached to anything in the kernel. */ kn->kn_hook = NULL; if (kn->kn_fflags & NOTE_TRIGGER) kn->kn_hookid = 1; else kn->kn_hookid = 0; return (0); } static void filt_userdetach(__unused struct knote *kn) { /* * EVFILT_USER knotes are not attached to anything in the kernel. */ } static int filt_user(struct knote *kn, __unused long hint) { return (kn->kn_hookid); } static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) { u_int ffctrl; switch (type) { case EVENT_REGISTER: if (kev->fflags & NOTE_TRIGGER) kn->kn_hookid = 1; ffctrl = kev->fflags & NOTE_FFCTRLMASK; kev->fflags &= NOTE_FFLAGSMASK; switch (ffctrl) { case NOTE_FFNOP: break; case NOTE_FFAND: kn->kn_sfflags &= kev->fflags; break; case NOTE_FFOR: kn->kn_sfflags |= kev->fflags; break; case NOTE_FFCOPY: kn->kn_sfflags = kev->fflags; break; default: /* XXX Return error? */ break; } kn->kn_sdata = kev->data; if (kev->flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } break; case EVENT_PROCESS: *kev = kn->kn_kevent; kev->fflags = kn->kn_sfflags; kev->data = kn->kn_sdata; if (kn->kn_flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } break; default: panic("filt_usertouch() - invalid type (%ld)", type); break; } } int sys_kqueue(struct thread *td, struct kqueue_args *uap) { return (kern_kqueue(td, 0, NULL)); } int sys_kqueuex(struct thread *td, struct kqueuex_args *uap) { int flags; if ((uap->flags & ~(KQUEUE_CLOEXEC)) != 0) return (EINVAL); flags = 0; if ((uap->flags & KQUEUE_CLOEXEC) != 0) flags |= O_CLOEXEC; return (kern_kqueue(td, flags, NULL)); } static void kqueue_init(struct kqueue *kq) { mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); TAILQ_INIT(&kq->kq_head); knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); } int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) { struct filedesc *fdp; struct kqueue *kq; struct file *fp; struct ucred *cred; int fd, error; fdp = td->td_proc->p_fd; cred = td->td_ucred; if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) return (ENOMEM); error = falloc_caps(td, &fp, &fd, flags, fcaps); if (error != 0) { chgkqcnt(cred->cr_ruidinfo, -1, 0); return (error); } /* An extra reference on `fp' has been held for us by falloc(). */ kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); kqueue_init(kq); kq->kq_fdp = fdp; kq->kq_cred = crhold(cred); FILEDESC_XLOCK(fdp); TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); FILEDESC_XUNLOCK(fdp); finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); fdrop(fp, td); td->td_retval[0] = fd; return (0); } struct g_kevent_args { int fd; const void *changelist; int nchanges; void *eventlist; int nevents; const struct timespec *timeout; }; int sys_kevent(struct thread *td, struct kevent_args *uap) { struct kevent_copyops k_ops = { .arg = uap, .k_copyout = kevent_copyout, .k_copyin = kevent_copyin, .kevent_size = sizeof(struct kevent), }; struct g_kevent_args gk_args = { .fd = uap->fd, .changelist = uap->changelist, .nchanges = uap->nchanges, .eventlist = uap->eventlist, .nevents = uap->nevents, .timeout = uap->timeout, }; return (kern_kevent_generic(td, &gk_args, &k_ops, "kevent")); } static int kern_kevent_generic(struct thread *td, struct g_kevent_args *uap, struct kevent_copyops *k_ops, const char *struct_name) { struct timespec ts, *tsp; #ifdef KTRACE struct kevent *eventlist = uap->eventlist; #endif int error; if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray(struct_name, UIO_USERSPACE, uap->changelist, uap->nchanges, k_ops->kevent_size); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray(struct_name, UIO_USERSPACE, eventlist, td->td_retval[0], k_ops->kevent_size); #endif return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int kevent_copyout(void *arg, struct kevent *kevp, int count) { struct kevent_args *uap; int error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct kevent_args *)arg; error = copyout(kevp, uap->eventlist, count * sizeof *kevp); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int kevent_copyin(void *arg, struct kevent *kevp, int count) { struct kevent_args *uap; int error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct kevent_args *)arg; error = copyin(uap->changelist, kevp, count * sizeof *kevp); if (error == 0) uap->changelist += count; return (error); } #ifdef COMPAT_FREEBSD11 static int kevent11_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd11_kevent_args *uap; struct freebsd11_kevent kev11; int error, i; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_kevent_args *)arg; for (i = 0; i < count; i++) { kev11.ident = kevp->ident; kev11.filter = kevp->filter; kev11.flags = kevp->flags; kev11.fflags = kevp->fflags; kev11.data = kevp->data; kev11.udata = kevp->udata; error = copyout(&kev11, uap->eventlist, sizeof(kev11)); if (error != 0) break; uap->eventlist++; kevp++; } return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int kevent11_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd11_kevent_args *uap; struct freebsd11_kevent kev11; int error, i; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_kevent_args *)arg; for (i = 0; i < count; i++) { error = copyin(uap->changelist, &kev11, sizeof(kev11)); if (error != 0) break; kevp->ident = kev11.ident; kevp->filter = kev11.filter; kevp->flags = kev11.flags; kevp->fflags = kev11.fflags; kevp->data = (uintptr_t)kev11.data; kevp->udata = kev11.udata; bzero(&kevp->ext, sizeof(kevp->ext)); uap->changelist++; kevp++; } return (error); } int freebsd11_kevent(struct thread *td, struct freebsd11_kevent_args *uap) { struct kevent_copyops k_ops = { .arg = uap, .k_copyout = kevent11_copyout, .k_copyin = kevent11_copyin, .kevent_size = sizeof(struct freebsd11_kevent), }; struct g_kevent_args gk_args = { .fd = uap->fd, .changelist = uap->changelist, .nchanges = uap->nchanges, .eventlist = uap->eventlist, .nevents = uap->nevents, .timeout = uap->timeout, }; return (kern_kevent_generic(td, &gk_args, &k_ops, "freebsd11_kevent")); } #endif int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { cap_rights_t rights; struct file *fp; int error; cap_rights_init_zero(&rights); if (nchanges > 0) cap_rights_set_one(&rights, CAP_KQUEUE_CHANGE); if (nevents > 0) cap_rights_set_one(&rights, CAP_KQUEUE_EVENT); error = fget(td, fd, &rights, &fp); if (error != 0) return (error); error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout); fdrop(fp, td); return (error); } static int kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kevent keva[KQ_NEVENTS]; struct kevent *kevp, *changes; int i, n, nerrors, error; if (nchanges < 0) return (EINVAL); nerrors = 0; while (nchanges > 0) { n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; error = k_ops->k_copyin(k_ops->arg, keva, n); if (error) return (error); changes = keva; for (i = 0; i < n; i++) { kevp = &changes[i]; if (!kevp->filter) continue; kevp->flags &= ~EV_SYSFLAGS; error = kqueue_register(kq, kevp, td, M_WAITOK); if (error || (kevp->flags & EV_RECEIPT)) { if (nevents == 0) return (error); kevp->flags = EV_ERROR; kevp->data = error; (void)k_ops->k_copyout(k_ops->arg, kevp, 1); nevents--; nerrors++; } } nchanges -= n; } if (nerrors) { td->td_retval[0] = nerrors; return (0); } return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td)); } int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kqueue *kq; int error; error = kqueue_acquire(fp, &kq); if (error != 0) return (error); error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout); kqueue_release(kq, 0); return (error); } /* * Performs a kevent() call on a temporarily created kqueue. This can be * used to perform one-shot polling, similar to poll() and select(). */ int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops) { struct kqueue kq = {}; int error; kqueue_init(&kq); kq.kq_refcnt = 1; error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); kqueue_drain(&kq, td); kqueue_destroy(&kq); return (error); } int kqueue_add_filteropts(int filt, const struct filterops *filtops) { int error; error = 0; if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { printf( "trying to add a filterop that is out of range: %d is beyond %d\n", ~filt, EVFILT_SYSCOUNT); return EINVAL; } mtx_lock(&filterops_lock); if (sysfilt_ops[~filt].for_fop != &null_filtops && sysfilt_ops[~filt].for_fop != NULL) error = EEXIST; else { sysfilt_ops[~filt].for_fop = filtops; sysfilt_ops[~filt].for_refcnt = 0; } mtx_unlock(&filterops_lock); return (error); } int kqueue_del_filteropts(int filt) { int error; error = 0; if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return EINVAL; mtx_lock(&filterops_lock); if (sysfilt_ops[~filt].for_fop == &null_filtops || sysfilt_ops[~filt].for_fop == NULL) error = EINVAL; else if (sysfilt_ops[~filt].for_refcnt != 0) error = EBUSY; else { sysfilt_ops[~filt].for_fop = &null_filtops; sysfilt_ops[~filt].for_refcnt = 0; } mtx_unlock(&filterops_lock); return error; } static const struct filterops * kqueue_fo_find(int filt) { if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return NULL; if (sysfilt_ops[~filt].for_nolock) return sysfilt_ops[~filt].for_fop; mtx_lock(&filterops_lock); sysfilt_ops[~filt].for_refcnt++; if (sysfilt_ops[~filt].for_fop == NULL) sysfilt_ops[~filt].for_fop = &null_filtops; mtx_unlock(&filterops_lock); return sysfilt_ops[~filt].for_fop; } static void kqueue_fo_release(int filt) { if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return; if (sysfilt_ops[~filt].for_nolock) return; mtx_lock(&filterops_lock); KASSERT(sysfilt_ops[~filt].for_refcnt > 0, ("filter object refcount not valid on release")); sysfilt_ops[~filt].for_refcnt--; mtx_unlock(&filterops_lock); } /* * A ref to kq (obtained via kqueue_acquire) must be held. */ static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int mflag) { const struct filterops *fops; struct file *fp; struct knote *kn, *tkn; struct knlist *knl; int error, filt, event; int haskqglobal, filedesc_unlock; if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE)) return (EINVAL); fp = NULL; kn = NULL; knl = NULL; error = 0; haskqglobal = 0; filedesc_unlock = 0; filt = kev->filter; fops = kqueue_fo_find(filt); if (fops == NULL) return EINVAL; if (kev->flags & EV_ADD) { /* Reject an invalid flag pair early */ if (kev->flags & EV_KEEPUDATA) { tkn = NULL; error = EINVAL; goto done; } /* * Prevent waiting with locks. Non-sleepable * allocation failures are handled in the loop, only * if the spare knote appears to be actually required. */ tkn = knote_alloc(mflag); } else { tkn = NULL; } findkn: if (fops->f_isfd) { KASSERT(td != NULL, ("td is NULL")); if (kev->ident > INT_MAX) error = EBADF; else error = fget(td, kev->ident, &cap_event_rights, &fp); if (error) goto done; if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, kev->ident, M_NOWAIT) != 0) { /* try again */ fdrop(fp, td); fp = NULL; error = kqueue_expand(kq, fops, kev->ident, mflag); if (error) goto done; goto findkn; } if (fp->f_type == DTYPE_KQUEUE) { /* * If we add some intelligence about what we are doing, * we should be able to support events on ourselves. * We need to know when we are doing this to prevent * getting both the knlist lock and the kq lock since * they are the same thing. */ if (fp->f_data == kq) { error = EINVAL; goto done; } /* * Pre-lock the filedesc before the global * lock mutex, see the comment in * kqueue_close(). */ FILEDESC_XLOCK(td->td_proc->p_fd); filedesc_unlock = 1; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); } KQ_LOCK(kq); if (kev->ident < kq->kq_knlistsize) { SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) if (kev->filter == kn->kn_filter) break; } } else { if ((kev->flags & EV_ADD) == EV_ADD) { error = kqueue_expand(kq, fops, kev->ident, mflag); if (error != 0) goto done; } KQ_LOCK(kq); /* * If possible, find an existing knote to use for this kevent. */ if (kev->filter == EVFILT_PROC && (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { /* This is an internal creation of a process tracking * note. Don't attempt to coalesce this with an * existing note. */ ; } else if (kq->kq_knhashmask != 0) { struct klist *list; list = &kq->kq_knhash[ KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; SLIST_FOREACH(kn, list, kn_link) if (kev->ident == kn->kn_id && kev->filter == kn->kn_filter) break; } } /* knote is in the process of changing, wait for it to stabilize. */ if (kn != NULL && kn_in_flux(kn)) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); filedesc_unlock = 0; } kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); if (fp != NULL) { fdrop(fp, td); fp = NULL; } goto findkn; } /* * kn now contains the matching knote, or NULL if no match */ if (kn == NULL) { if (kev->flags & EV_ADD) { kn = tkn; tkn = NULL; if (kn == NULL) { KQ_UNLOCK(kq); error = ENOMEM; goto done; } kn->kn_fp = fp; kn->kn_kq = kq; kn->kn_fop = fops; /* * apply reference counts to knote structure, and * do not release it at the end of this routine. */ fops = NULL; fp = NULL; kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kev->fflags = 0; kev->data = 0; kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); kn->kn_status = KN_DETACHED; if ((kev->flags & EV_DISABLE) != 0) kn->kn_status |= KN_DISABLED; kn_enter_flux(kn); error = knote_attach(kn, kq); KQ_UNLOCK(kq); if (error != 0) { tkn = kn; goto done; } if ((error = kn->kn_fop->f_attach(kn)) != 0) { knote_drop_detached(kn, td); goto done; } knl = kn_list_lock(kn); goto done_ev_add; } else { /* No matching knote and the EV_ADD flag is not set. */ KQ_UNLOCK(kq); error = ENOENT; goto done; } } if (kev->flags & EV_DELETE) { kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); goto done; } if (kev->flags & EV_FORCEONESHOT) { kn->kn_flags |= EV_ONESHOT; KNOTE_ACTIVATE(kn, 1); } if ((kev->flags & EV_ENABLE) != 0) kn->kn_status &= ~KN_DISABLED; else if ((kev->flags & EV_DISABLE) != 0) kn->kn_status |= KN_DISABLED; /* * The user may change some filter values after the initial EV_ADD, * but doing so will not reset any filter which has already been * triggered. */ kn->kn_status |= KN_SCAN; kn_enter_flux(kn); KQ_UNLOCK(kq); knl = kn_list_lock(kn); if ((kev->flags & EV_KEEPUDATA) == 0) kn->kn_kevent.udata = kev->udata; if (!fops->f_isfd && fops->f_touch != NULL) { fops->f_touch(kn, kev, EVENT_REGISTER); } else { kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; } done_ev_add: /* * We can get here with kn->kn_knlist == NULL. This can happen when * the initial attach event decides that the event is "completed" * already, e.g., filt_procattach() is called on a zombie process. It * will call filt_proc() which will remove it from the list, and NULL * kn_knlist. * * KN_DISABLED will be stable while the knote is in flux, so the * unlocked read will not race with an update. */ if ((kn->kn_status & KN_DISABLED) == 0) event = kn->kn_fop->f_event(kn, 0); else event = 0; KQ_LOCK(kq); if (event) kn->kn_status |= KN_ACTIVE; if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); kn->kn_status &= ~KN_SCAN; kn_leave_flux(kn); kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); done: KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) FILEDESC_XUNLOCK(td->td_proc->p_fd); if (fp != NULL) fdrop(fp, td); knote_free(tkn); if (fops != NULL) kqueue_fo_release(filt); return (error); } static int kqueue_acquire(struct file *fp, struct kqueue **kqp) { int error; struct kqueue *kq; error = 0; kq = fp->f_data; if (fp->f_type != DTYPE_KQUEUE || kq == NULL) return (EBADF); *kqp = kq; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { KQ_UNLOCK(kq); return (EBADF); } kq->kq_refcnt++; KQ_UNLOCK(kq); return error; } static void kqueue_release(struct kqueue *kq, int locked) { if (locked) KQ_OWNED(kq); else KQ_LOCK(kq); kq->kq_refcnt--; if (kq->kq_refcnt == 1) wakeup(&kq->kq_refcnt); if (!locked) KQ_UNLOCK(kq); } static void ast_kqueue(struct thread *td, int tda __unused) { taskqueue_quiesce(taskqueue_kqueue_ctx); } static void kqueue_schedtask(struct kqueue *kq) { KQ_OWNED(kq); KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), ("scheduling kqueue task while draining")); if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task); kq->kq_state |= KQ_TASKSCHED; ast_sched(curthread, TDA_KQUEUE); } } /* * Expand the kq to make sure we have storage for fops/ident pair. * * Return 0 on success (or no work necessary), return errno on failure. */ static int kqueue_expand(struct kqueue *kq, const struct filterops *fops, uintptr_t ident, int mflag) { struct klist *list, *tmp_knhash, *to_free; u_long tmp_knhashmask; int error, fd, size; KQ_NOTOWNED(kq); error = 0; to_free = NULL; if (fops->f_isfd) { fd = ident; if (kq->kq_knlistsize <= fd) { size = kq->kq_knlistsize; while (size <= fd) size += KQEXTENT; list = malloc(size * sizeof(*list), M_KQUEUE, mflag); if (list == NULL) return ENOMEM; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) != 0) { to_free = list; error = EBADF; } else if (kq->kq_knlistsize > fd) { to_free = list; } else { if (kq->kq_knlist != NULL) { bcopy(kq->kq_knlist, list, kq->kq_knlistsize * sizeof(*list)); to_free = kq->kq_knlist; kq->kq_knlist = NULL; } bzero((caddr_t)list + kq->kq_knlistsize * sizeof(*list), (size - kq->kq_knlistsize) * sizeof(*list)); kq->kq_knlistsize = size; kq->kq_knlist = list; } KQ_UNLOCK(kq); } } else { if (kq->kq_knhashmask == 0) { tmp_knhash = hashinit_flags(KN_HASHSIZE, M_KQUEUE, &tmp_knhashmask, (mflag & M_WAITOK) != 0 ? HASH_WAITOK : HASH_NOWAIT); if (tmp_knhash == NULL) return (ENOMEM); KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) != 0) { to_free = tmp_knhash; error = EBADF; } else if (kq->kq_knhashmask == 0) { kq->kq_knhash = tmp_knhash; kq->kq_knhashmask = tmp_knhashmask; } else { to_free = tmp_knhash; } KQ_UNLOCK(kq); } } free(to_free, M_KQUEUE); KQ_NOTOWNED(kq); return (error); } static void kqueue_task(void *arg, int pending) { struct kqueue *kq; int haskqglobal; haskqglobal = 0; kq = arg; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); KQ_LOCK(kq); KNOTE_LOCKED(&kq->kq_sel.si_note, 0); kq->kq_state &= ~KQ_TASKSCHED; if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { wakeup(&kq->kq_state); } KQ_UNLOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); } /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. * We treat KN_MARKER knotes as if they are in flux. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *tsp, struct kevent *keva, struct thread *td) { struct kevent *kevp; struct knote *kn, *marker; struct knlist *knl; sbintime_t asbt, rsbt; int count, error, haskqglobal, influx, nkev, touch; count = maxevents; nkev = 0; error = 0; haskqglobal = 0; if (maxevents == 0) goto done_nl; if (maxevents < 0) { error = EINVAL; goto done_nl; } rsbt = 0; if (tsp != NULL) { if (!timespecvalid_interval(tsp)) { error = EINVAL; goto done_nl; } if (timespecisset(tsp)) { if (tsp->tv_sec <= INT32_MAX) { rsbt = tstosbt(*tsp); if (TIMESEL(&asbt, rsbt)) asbt += tc_tick_sbt; if (asbt <= SBT_MAX - rsbt) asbt += rsbt; else asbt = 0; rsbt >>= tc_precexp; } else asbt = 0; } else asbt = -1; } else asbt = 0; marker = knote_alloc(M_WAITOK); marker->kn_status = KN_MARKER; KQ_LOCK(kq); retry: kevp = keva; if (kq->kq_count == 0) { if (asbt == -1) { error = EWOULDBLOCK; } else { kq->kq_state |= KQ_SLEEP; error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH, "kqread", asbt, rsbt, C_ABSOLUTE); } if (error == 0) goto retry; /* don't restart after signals... */ if (error == ERESTART) error = EINTR; else if (error == EWOULDBLOCK) error = 0; goto done; } TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); influx = 0; while (count) { KQ_OWNED(kq); kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || kn_in_flux(kn)) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); } kq->kq_state |= KQ_FLUXWAIT; error = msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); continue; } TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { kn->kn_status &= ~KN_QUEUED; kq->kq_count--; continue; } if (kn == marker) { KQ_FLUX_WAKEUP(kq); if (count == maxevents) goto retry; goto done; } KASSERT(!kn_in_flux(kn), ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* * We don't need to lock the list since we've * marked it as in flux. */ knote_drop(kn, td); KQ_LOCK(kq); continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* * We don't need to lock the list since we've * marked the knote as being in flux. */ *kevp = kn->kn_kevent; knote_drop(kn, td); KQ_LOCK(kq); kn = NULL; } else { kn->kn_status |= KN_SCAN; kn_enter_flux(kn); KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); knl = kn_list_lock(kn); if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN); kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); influx = 1; continue; } touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); if (touch) kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); else *kevp = kn->kn_kevent; KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { /* * Manually clear knotes who weren't * 'touch'ed. */ if (touch == 0 && kn->kn_flags & EV_CLEAR) { kn->kn_data = 0; kn->kn_fflags = 0; } if (kn->kn_flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); kq->kq_count--; } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_SCAN; kn_leave_flux(kn); kn_list_unlock(knl); influx = 1; } /* we are returning a copy to the user */ kevp++; nkev++; count--; if (nkev == KQ_NEVENTS) { influx = 0; KQ_UNLOCK_FLUX(kq); error = k_ops->k_copyout(k_ops->arg, keva, nkev); nkev = 0; kevp = keva; KQ_LOCK(kq); if (error) break; } } TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); done: KQ_OWNED(kq); KQ_UNLOCK_FLUX(kq); knote_free(marker); done_nl: KQ_NOTOWNED(kq); if (nkev != 0) error = k_ops->k_copyout(k_ops->arg, keva, nkev); td->td_retval[0] = maxevents - count; return (error); } /*ARGSUSED*/ static int kqueue_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { /* * Enabling sigio causes two major problems: * 1) infinite recursion: * Synopsys: kevent is being used to track signals and have FIOASYNC * set. On receipt of a signal this will cause a kqueue to recurse * into itself over and over. Sending the sigio causes the kqueue * to become ready, which in turn posts sigio again, forever. * Solution: this can be solved by setting a flag in the kqueue that * we have a SIGIO in progress. * 2) locking problems: * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts * us above the proc and pgrp locks. * Solution: Post a signal using an async mechanism, being sure to * record a generation count in the delivery so that we do not deliver * a signal to the wrong process. * * Note, these two mechanisms are somewhat mutually exclusive! */ #if 0 struct kqueue *kq; kq = fp->f_data; switch (cmd) { case FIOASYNC: if (*(int *)data) { kq->kq_state |= KQ_ASYNC; } else { kq->kq_state &= ~KQ_ASYNC; } return (0); case FIOSETOWN: return (fsetown(*(int *)data, &kq->kq_sigio)); case FIOGETOWN: *(int *)data = fgetown(&kq->kq_sigio); return (0); } #endif return (ENOTTY); } /*ARGSUSED*/ static int kqueue_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct kqueue *kq; int revents = 0; int error; if ((error = kqueue_acquire(fp, &kq))) return POLLERR; KQ_LOCK(kq); if (events & (POLLIN | POLLRDNORM)) { if (kq->kq_count) { revents |= events & (POLLIN | POLLRDNORM); } else { selrecord(td, &kq->kq_sel); if (SEL_WAITING(&kq->kq_sel)) kq->kq_state |= KQ_SEL; } } kqueue_release(kq, 1); KQ_UNLOCK(kq); return (revents); } /*ARGSUSED*/ static int kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred) { bzero((void *)st, sizeof *st); /* * We no longer return kq_count because the unlocked value is useless. * If you spent all this time getting the count, why not spend your * syscall better by calling kevent? * * XXX - This is needed for libc_r. */ st->st_mode = S_IFIFO; return (0); } static void kqueue_drain(struct kqueue *kq, struct thread *td) { struct knote *kn; int i; KQ_LOCK(kq); KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, ("kqueue already closing")); kq->kq_state |= KQ_CLOSING; if (kq->kq_refcnt > 1) msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); KASSERT(knlist_empty(&kq->kq_sel.si_note), ("kqueue's knlist not empty")); for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); KQ_LOCK(kq); } } if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); KQ_LOCK(kq); } } } if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { kq->kq_state |= KQ_TASKDRAIN; msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { selwakeuppri(&kq->kq_sel, PSOCK); if (!SEL_WAITING(&kq->kq_sel)) kq->kq_state &= ~KQ_SEL; } KQ_UNLOCK(kq); } static void kqueue_destroy(struct kqueue *kq) { KASSERT(kq->kq_fdp == NULL, ("kqueue still attached to a file descriptor")); seldrain(&kq->kq_sel); knlist_destroy(&kq->kq_sel.si_note); mtx_destroy(&kq->kq_lock); if (kq->kq_knhash != NULL) free(kq->kq_knhash, M_KQUEUE); if (kq->kq_knlist != NULL) free(kq->kq_knlist, M_KQUEUE); funsetown(&kq->kq_sigio); } /*ARGSUSED*/ static int kqueue_close(struct file *fp, struct thread *td) { struct kqueue *kq = fp->f_data; struct filedesc *fdp; int error; int filedesc_unlock; if ((error = kqueue_acquire(fp, &kq))) return error; kqueue_drain(kq, td); /* * We could be called due to the knote_drop() doing fdrop(), * called from kqueue_register(). In this case the global * lock is owned, and filedesc sx is locked before, to not * take the sleepable lock after non-sleepable. */ fdp = kq->kq_fdp; kq->kq_fdp = NULL; if (!sx_xlocked(FILEDESC_LOCK(fdp))) { FILEDESC_XLOCK(fdp); filedesc_unlock = 1; } else filedesc_unlock = 0; TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list); if (filedesc_unlock) FILEDESC_XUNLOCK(fdp); kqueue_destroy(kq); chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0); crfree(kq->kq_cred); free(kq, M_KQUEUE); fp->f_data = NULL; return (0); } static int kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { struct kqueue *kq = fp->f_data; kif->kf_type = KF_TYPE_KQUEUE; kif->kf_un.kf_kqueue.kf_kqueue_addr = (uintptr_t)kq; kif->kf_un.kf_kqueue.kf_kqueue_count = kq->kq_count; kif->kf_un.kf_kqueue.kf_kqueue_state = kq->kq_state; return (0); } static void kqueue_wakeup(struct kqueue *kq) { KQ_OWNED(kq); if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { kq->kq_state &= ~KQ_SLEEP; wakeup(kq); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { selwakeuppri(&kq->kq_sel, PSOCK); if (!SEL_WAITING(&kq->kq_sel)) kq->kq_state &= ~KQ_SEL; } if (!knlist_empty(&kq->kq_sel.si_note)) kqueue_schedtask(kq); if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { pgsigio(&kq->kq_sigio, SIGIO, 0); } } /* * Walk down a list of knotes, activating them if their event has triggered. * * There is a possibility to optimize in the case of one kq watching another. * Instead of scheduling a task to wake it up, you could pass enough state * down the chain to make up the parent kqueue. Make this code functional * first. */ void knote(struct knlist *list, long hint, int lockflags) { struct kqueue *kq; struct knote *kn, *tkn; int error; if (list == NULL) return; KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED); if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_lock(list->kl_lockarg); /* * If we unlock the list lock (and enter influx), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters * or other threads could remove events. */ SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the * kqueue_scan(). In the later case, we do * not interfere with the scan, since the code * fragment in kqueue_scan() locks the knlist, * and cannot proceed until we finished. */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { kn_enter_flux(kn); KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); kn_leave_flux(kn); if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); } else { if (kn->kn_fop->f_event(kn, hint)) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK(kq); } } if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_unlock(list->kl_lockarg); } /* * add a knote to a knlist */ void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); KASSERT((kn->kn_status & KN_DETACHED) != 0, ("knote %p was not detached", kn)); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); if (!islocked) knl->kl_unlock(knl->kl_lockarg); KQ_LOCK(kn->kn_kq); kn->kn_knlist = knl; kn->kn_status &= ~KN_DETACHED; KQ_UNLOCK(kn->kn_kq); } static void knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); KASSERT((kn->kn_status & KN_DETACHED) == 0, ("knote %p was already detached", kn)); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); kn->kn_knlist = NULL; if (!knlislocked) kn_list_unlock(knl); if (!kqislocked) KQ_LOCK(kn->kn_kq); kn->kn_status |= KN_DETACHED; if (!kqislocked) KQ_UNLOCK(kn->kn_kq); } /* * remove knote from the specified knlist */ void knlist_remove(struct knlist *knl, struct knote *kn, int islocked) { knlist_remove_kq(knl, kn, islocked, 0); } int knlist_empty(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); return (SLIST_EMPTY(&knl->kl_list)); } static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", MTX_DEF); static void knlist_mtx_lock(void *arg); static void knlist_mtx_unlock(void *arg); static void knlist_mtx_lock(void *arg) { mtx_lock((struct mtx *)arg); } static void knlist_mtx_unlock(void *arg) { mtx_unlock((struct mtx *)arg); } static void knlist_mtx_assert_lock(void *arg, int what) { if (what == LA_LOCKED) mtx_assert((struct mtx *)arg, MA_OWNED); else mtx_assert((struct mtx *)arg, MA_NOTOWNED); } void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), void (*kl_assert_lock)(void *, int)) { if (lock == NULL) knl->kl_lockarg = &knlist_lock; else knl->kl_lockarg = lock; if (kl_lock == NULL) knl->kl_lock = knlist_mtx_lock; else knl->kl_lock = kl_lock; if (kl_unlock == NULL) knl->kl_unlock = knlist_mtx_unlock; else knl->kl_unlock = kl_unlock; if (kl_assert_lock == NULL) knl->kl_assert_lock = knlist_mtx_assert_lock; else knl->kl_assert_lock = kl_assert_lock; knl->kl_autodestroy = 0; SLIST_INIT(&knl->kl_list); } void knlist_init_mtx(struct knlist *knl, struct mtx *lock) { knlist_init(knl, lock, NULL, NULL, NULL); } struct knlist * knlist_alloc(struct mtx *lock) { struct knlist *knl; knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK); knlist_init_mtx(knl, lock); return (knl); } void knlist_destroy(struct knlist *knl) { KASSERT(KNLIST_EMPTY(knl), ("destroying knlist %p with knotes on it", knl)); } void knlist_detach(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); knl->kl_autodestroy = 1; if (knlist_empty(knl)) { knlist_destroy(knl); free(knl, M_KQUEUE); } } /* * Even if we are locked, we may need to drop the lock to allow any influx * knotes time to "settle". */ void knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) { struct knote *kn, *kn2; struct kqueue *kq; KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl)); if (islocked) KNL_ASSERT_LOCKED(knl); else { KNL_ASSERT_UNLOCKED(knl); again: /* need to reacquire lock since we have dropped it */ knl->kl_lock(knl->kl_lockarg); } SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); if (kn_in_flux(kn)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop_detached(kn, td); } else { /* Make sure cleared knotes disappear soon */ kn->kn_flags |= EV_EOF | EV_ONESHOT; KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { /* there are still in flux knotes remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); kq = NULL; goto again; } if (islocked) KNL_ASSERT_LOCKED(knl); else { knl->kl_unlock(knl->kl_lockarg); KNL_ASSERT_UNLOCKED(knl); } } /* * Remove all knotes referencing a specified fd must be called with FILEDESC * lock. This prevents a race where a new fd comes along and occupies the * entry and we attach a knote to the fd. */ void knote_fdclose(struct thread *td, int fd) { struct filedesc *fdp = td->td_proc->p_fd; struct kqueue *kq; struct knote *kn; int influx; FILEDESC_XLOCK_ASSERT(fdp); /* * We shouldn't have to worry about new kevents appearing on fd * since filedesc is locked. */ TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) { KQ_LOCK(kq); again: influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { if (kn_in_flux(kn)) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } kn_enter_flux(kn); KQ_UNLOCK(kq); influx = 1; knote_drop(kn, td); KQ_LOCK(kq); } KQ_UNLOCK_FLUX(kq); } } static int knote_attach(struct knote *kn, struct kqueue *kq) { struct klist *list; KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); KQ_OWNED(kq); if ((kq->kq_state & KQ_CLOSING) != 0) return (EBADF); if (kn->kn_fop->f_isfd) { if (kn->kn_id >= kq->kq_knlistsize) return (ENOMEM); list = &kq->kq_knlist[kn->kn_id]; } else { if (kq->kq_knhash == NULL) return (ENOMEM); list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } SLIST_INSERT_HEAD(list, kn, kn_link); return (0); } static void knote_drop(struct knote *kn, struct thread *td) { if ((kn->kn_status & KN_DETACHED) == 0) kn->kn_fop->f_detach(kn); knote_drop_detached(kn, td); } static void knote_drop_detached(struct knote *kn, struct thread *td) { struct kqueue *kq; struct klist *list; kq = kn->kn_kq; KASSERT((kn->kn_status & KN_DETACHED) != 0, ("knote %p still attached", kn)); KQ_NOTOWNED(kq); KQ_LOCK(kq); for (;;) { KASSERT(kn->kn_influx >= 1, ("knote_drop called on %p with influx %d", kn, kn->kn_influx)); if (kn->kn_influx == 1) break; kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); } if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; if (!SLIST_EMPTY(list)) SLIST_REMOVE(list, kn, knote, kn_link); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); KQ_UNLOCK_FLUX(kq); if (kn->kn_fop->f_isfd) { fdrop(kn->kn_fp, td); kn->kn_fp = NULL; } kqueue_fo_release(kn->kn_kevent.filter); kn->kn_fop = NULL; knote_free(kn); } static void knote_enqueue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; KQ_OWNED(kn->kn_kq); KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status |= KN_QUEUED; kq->kq_count++; kqueue_wakeup(kq); } static void knote_dequeue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; KQ_OWNED(kn->kn_kq); KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; } static void knote_init(void) { knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue); } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); static struct knote * knote_alloc(int mflag) { return (uma_zalloc(knote_zone, mflag | M_ZERO)); } static void knote_free(struct knote *kn) { uma_zfree(knote_zone, kn); } /* * Register the kev w/ the kq specified by fd. */ int kqfd_register(int fd, struct kevent *kev, struct thread *td, int mflag) { struct kqueue *kq; struct file *fp; cap_rights_t rights; int error; error = fget(td, fd, cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &fp); if (error != 0) return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto noacquire; error = kqueue_register(kq, kev, td, mflag); kqueue_release(kq, 0); noacquire: fdrop(fp, td); return (error); } struct knote_status_export_bit { int kn_status_bit; int knt_status_bit; }; #define ST(name) \ { .kn_status_bit = KN_##name, .knt_status_bit = KNOTE_STATUS_##name } static const struct knote_status_export_bit knote_status_export_bits[] = { ST(ACTIVE), ST(QUEUED), ST(DISABLED), ST(DETACHED), ST(KQUEUE), }; #undef ST static int knote_status_export(int kn_status) { const struct knote_status_export_bit *b; unsigned i; int res; res = 0; for (i = 0; i < nitems(knote_status_export_bits); i++) { b = &knote_status_export_bits[i]; if ((kn_status & b->kn_status_bit) != 0) res |= b->knt_status_bit; } return (res); } static int kern_proc_kqueue_report_one(struct sbuf *s, struct proc *p, int kq_fd, struct kqueue *kq, struct knote *kn, bool compat32 __unused) { struct kinfo_knote kin; #ifdef COMPAT_FREEBSD32 struct kinfo_knote32 kin32; #endif int error; if (kn->kn_status == KN_MARKER) return (0); memset(&kin, 0, sizeof(kin)); kin.knt_kq_fd = kq_fd; memcpy(&kin.knt_event, &kn->kn_kevent, sizeof(struct kevent)); kin.knt_status = knote_status_export(kn->kn_status); kn_enter_flux(kn); KQ_UNLOCK_FLUX(kq); if (kn->kn_fop->f_userdump != NULL) (void)kn->kn_fop->f_userdump(p, kn, &kin); #ifdef COMPAT_FREEBSD32 if (compat32) { freebsd32_kinfo_knote_to_32(&kin, &kin32); error = sbuf_bcat(s, &kin32, sizeof(kin32)); } else #endif error = sbuf_bcat(s, &kin, sizeof(kin)); KQ_LOCK(kq); kn_leave_flux(kn); return (error); } static int kern_proc_kqueue_report(struct sbuf *s, struct proc *p, int kq_fd, struct kqueue *kq, bool compat32) { struct knote *kn; int error, i; error = 0; KQ_LOCK(kq); for (i = 0; i < kq->kq_knlistsize; i++) { SLIST_FOREACH(kn, &kq->kq_knlist[i], kn_link) { error = kern_proc_kqueue_report_one(s, p, kq_fd, kq, kn, compat32); if (error != 0) goto out; } } if (kq->kq_knhashmask == 0) goto out; for (i = 0; i <= kq->kq_knhashmask; i++) { SLIST_FOREACH(kn, &kq->kq_knhash[i], kn_link) { error = kern_proc_kqueue_report_one(s, p, kq_fd, kq, kn, compat32); if (error != 0) goto out; } } out: KQ_UNLOCK_FLUX(kq); return (error); } +struct kern_proc_kqueues_out1_cb_args { + struct sbuf *s; + bool compat32; +}; + +static int +kern_proc_kqueues_out1_cb(struct proc *p, int fd, struct file *fp, void *arg) +{ + struct kqueue *kq; + struct kern_proc_kqueues_out1_cb_args *a; + + if (fp->f_type != DTYPE_KQUEUE) + return (0); + a = arg; + kq = fp->f_data; + return (kern_proc_kqueue_report(a->s, p, fd, kq, a->compat32)); +} + +static int +kern_proc_kqueues_out1(struct thread *td, struct proc *p, struct sbuf *s, + bool compat32) +{ + struct kern_proc_kqueues_out1_cb_args a; + + MPASS(p == curproc); + + a.s = s; + a.compat32 = compat32; + return (fget_remote_foreach(td, p, kern_proc_kqueues_out1_cb, &a)); +} + +int +kern_proc_kqueues_out(struct proc *p, struct sbuf *sb, size_t maxlen, + bool compat32) +{ + struct sbuf *s, sm; + int error; + + s = sbuf_new(&sm, NULL, maxlen, SBUF_FIXEDLEN); + error = kern_proc_kqueues_out1(curthread, p, s, compat32); + sbuf_finish(s); + if (error == 0) + sbuf_bcat(sb, sbuf_data(s), MIN(sbuf_len(s), maxlen)); + sbuf_delete(s); + return (error); +} + static int sysctl_kern_proc_kqueue(SYSCTL_HANDLER_ARGS) { struct thread *td; struct proc *p; struct file *fp; struct kqueue *kq; struct sbuf *s, sm; int error, error1, kq_fd, *name; bool compat32; name = (int *)arg1; if ((u_int)arg2 != 2) return (EINVAL); error = pget((pid_t)name[0], PGET_HOLD | PGET_CANDEBUG, &p); if (error != 0) return (error); td = curthread; kq_fd = name[1]; error = fget_remote(td, p, kq_fd, &fp); if (error != 0) goto out1; if (fp->f_type != DTYPE_KQUEUE) { error = EINVAL; goto out2; } s = sbuf_new_for_sysctl(&sm, NULL, 0, req); if (s == NULL) { error = ENOMEM; goto out2; } sbuf_clear_flags(s, SBUF_INCLUDENUL); #ifdef FREEBSD_COMPAT32 compat32 = SV_CURPROC_FLAG(SV_ILP32); #else compat32 = false; #endif kq = fp->f_data; error = kern_proc_kqueue_report(s, p, kq_fd, kq, compat32); error1 = sbuf_finish(s); if (error == 0) error = error1; sbuf_delete(s); out2: fdrop(fp, td); out1: PRELE(p); return (error); } static SYSCTL_NODE(_kern_proc, KERN_PROC_KQUEUE, kq, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_kqueue, "KQueue events"); diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h index 766ff6ede51b..488aaaf25ab1 100644 --- a/sys/sys/elf_common.h +++ b/sys/sys/elf_common.h @@ -1,1686 +1,1687 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2017, 2018 Dell EMC * Copyright (c) 2000, 2001, 2008, 2011, David E. O'Brien * Copyright (c) 1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_ELF_COMMON_H_ #define _SYS_ELF_COMMON_H_ 1 /* * ELF definitions that are independent of architecture or word size. */ /* * Note header. The ".note" section contains an array of notes. Each * begins with this header, aligned to a word boundary. Immediately * following the note header is n_namesz bytes of name, padded to the * next word boundary. Then comes n_descsz bytes of descriptor, again * padded to a word boundary. The values of n_namesz and n_descsz do * not include the padding. */ #if !defined(LOCORE) && !defined(__ASSEMBLER__) typedef struct { u_int32_t n_namesz; /* Length of name. */ u_int32_t n_descsz; /* Length of descriptor. */ u_int32_t n_type; /* Type of this note. */ } Elf_Note; typedef Elf_Note Elf_Nhdr; #endif /* * Option kinds. */ #define ODK_NULL 0 /* undefined */ #define ODK_REGINFO 1 /* register usage info */ #define ODK_EXCEPTIONS 2 /* exception processing info */ #define ODK_PAD 3 /* section padding */ #define ODK_HWPATCH 4 /* hardware patch applied */ #define ODK_FILL 5 /* fill value used by the linker */ #define ODK_TAGS 6 /* reserved space for tools */ #define ODK_HWAND 7 /* hardware AND patch applied */ #define ODK_HWOR 8 /* hardware OR patch applied */ #define ODK_GP_GROUP 9 /* GP group for text/data sections */ #define ODK_IDENT 10 /* ID information */ #define ODK_PAGESIZE 11 /* page size information */ /* * ODK_EXCEPTIONS info field masks. */ #define OEX_FPU_MIN 0x0000001f /* min FPU exception required */ #define OEX_FPU_MAX 0x00001f00 /* max FPU exception allowed */ #define OEX_PAGE0 0x00010000 /* page zero must be mapped */ #define OEX_SMM 0x00020000 /* run in sequential memory mode */ #define OEX_PRECISEFP 0x00040000 /* run in precise FP exception mode */ #define OEX_DISMISS 0x00080000 /* dismiss invalid address traps */ /* * ODK_PAD info field masks. */ #define OPAD_PREFIX 0x0001 #define OPAD_POSTFIX 0x0002 #define OPAD_SYMBOL 0x0004 /* * ODK_HWPATCH info field masks. */ #define OHW_R4KEOP 0x00000001 /* patch for R4000 branch at end-of-page bug */ #define OHW_R8KPFETCH 0x00000002 /* R8000 prefetch bug may occur */ #define OHW_R5KEOP 0x00000004 /* patch for R5000 branch at end-of-page bug */ #define OHW_R5KCVTL 0x00000008 /* R5000 cvt.[ds].l bug: clean == 1 */ #define OHW_R10KLDL 0x00000010UL /* need patch for R10000 misaligned load */ /* * ODK_HWAND/ODK_HWOR info field and hwp_flags[12] masks. */ #define OHWA0_R4KEOP_CHECKED 0x00000001 /* object checked for R4000 end-of-page bug */ #define OHWA0_R4KEOP_CLEAN 0x00000002 /* object verified clean for R4000 end-of-page bug */ #define OHWO0_FIXADE 0x00000001 /* object requires call to fixade */ /* * ODK_IDENT/ODK_GP_GROUP info field masks. */ #define OGP_GROUP 0x0000ffff /* GP group number */ #define OGP_SELF 0x00010000 /* GP group is self-contained */ /* * The header for GNU-style hash sections. */ #if !defined(LOCORE) && !defined(__ASSEMBLER__) typedef struct { u_int32_t gh_nbuckets; /* Number of hash buckets. */ u_int32_t gh_symndx; /* First visible symbol in .dynsym. */ u_int32_t gh_maskwords; /* #maskwords used in bloom filter. */ u_int32_t gh_shift2; /* Bloom filter shift count. */ } Elf_GNU_Hash_Header; #endif /* Indexes into the e_ident array. Keep synced with http://www.sco.com/developers/gabi/latest/ch4.eheader.html */ #define EI_MAG0 0 /* Magic number, byte 0. */ #define EI_MAG1 1 /* Magic number, byte 1. */ #define EI_MAG2 2 /* Magic number, byte 2. */ #define EI_MAG3 3 /* Magic number, byte 3. */ #define EI_CLASS 4 /* Class of machine. */ #define EI_DATA 5 /* Data format. */ #define EI_VERSION 6 /* ELF format version. */ #define EI_OSABI 7 /* Operating system / ABI identification */ #define EI_ABIVERSION 8 /* ABI version */ #define OLD_EI_BRAND 8 /* Start of architecture identification. */ #define EI_PAD 9 /* Start of padding (per SVR4 ABI). */ #define EI_NIDENT 16 /* Size of e_ident array. */ /* Values for the magic number bytes. */ #define ELFMAG0 0x7f #define ELFMAG1 'E' #define ELFMAG2 'L' #define ELFMAG3 'F' #define ELFMAG "\177ELF" /* magic string */ #define SELFMAG 4 /* magic string size */ /* Values for e_ident[EI_VERSION] and e_version. */ #define EV_NONE 0 #define EV_CURRENT 1 /* Values for e_ident[EI_CLASS]. */ #define ELFCLASSNONE 0 /* Unknown class. */ #define ELFCLASS32 1 /* 32-bit architecture. */ #define ELFCLASS64 2 /* 64-bit architecture. */ /* Values for e_ident[EI_DATA]. */ #define ELFDATANONE 0 /* Unknown data format. */ #define ELFDATA2LSB 1 /* 2's complement little-endian. */ #define ELFDATA2MSB 2 /* 2's complement big-endian. */ /* Values for e_ident[EI_OSABI]. */ #define ELFOSABI_NONE 0 /* UNIX System V ABI */ #define ELFOSABI_HPUX 1 /* HP-UX operating system */ #define ELFOSABI_NETBSD 2 /* NetBSD */ #define ELFOSABI_LINUX 3 /* GNU/Linux */ #define ELFOSABI_HURD 4 /* GNU/Hurd */ #define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */ #define ELFOSABI_SOLARIS 6 /* Solaris */ #define ELFOSABI_AIX 7 /* AIX */ #define ELFOSABI_IRIX 8 /* IRIX */ #define ELFOSABI_FREEBSD 9 /* FreeBSD */ #define ELFOSABI_TRU64 10 /* TRU64 UNIX */ #define ELFOSABI_MODESTO 11 /* Novell Modesto */ #define ELFOSABI_OPENBSD 12 /* OpenBSD */ #define ELFOSABI_OPENVMS 13 /* Open VMS */ #define ELFOSABI_NSK 14 /* HP Non-Stop Kernel */ #define ELFOSABI_AROS 15 /* Amiga Research OS */ #define ELFOSABI_FENIXOS 16 /* FenixOS */ #define ELFOSABI_CLOUDABI 17 /* Nuxi CloudABI */ #define ELFOSABI_OPENVOS 18 /* Stratus Technologies OpenVOS */ #define ELFOSABI_ARM_AEABI 64 /* ARM EABI */ #define ELFOSABI_ARM 97 /* ARM */ #define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ #define ELFOSABI_SYSV ELFOSABI_NONE /* symbol used in old spec */ #define ELFOSABI_MONTEREY ELFOSABI_AIX /* Monterey */ #define ELFOSABI_GNU ELFOSABI_LINUX /* e_ident */ #define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \ (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \ (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \ (ehdr).e_ident[EI_MAG3] == ELFMAG3) /* Values for e_type. */ #define ET_NONE 0 /* Unknown type. */ #define ET_REL 1 /* Relocatable. */ #define ET_EXEC 2 /* Executable. */ #define ET_DYN 3 /* Shared object. */ #define ET_CORE 4 /* Core file. */ #define ET_LOOS 0xfe00 /* First operating system specific. */ #define ET_HIOS 0xfeff /* Last operating system-specific. */ #define ET_LOPROC 0xff00 /* First processor-specific. */ #define ET_HIPROC 0xffff /* Last processor-specific. */ /* Values for e_machine. */ #define EM_NONE 0 /* Unknown machine. */ #define EM_M32 1 /* AT&T WE32100. */ #define EM_SPARC 2 /* Sun SPARC. */ #define EM_386 3 /* Intel i386. */ #define EM_68K 4 /* Motorola 68000. */ #define EM_88K 5 /* Motorola 88000. */ #define EM_IAMCU 6 /* Intel MCU. */ #define EM_860 7 /* Intel i860. */ #define EM_MIPS 8 /* MIPS R3000 Big-Endian only. */ #define EM_S370 9 /* IBM System/370. */ #define EM_MIPS_RS3_LE 10 /* MIPS R3000 Little-Endian. */ #define EM_PARISC 15 /* HP PA-RISC. */ #define EM_VPP500 17 /* Fujitsu VPP500. */ #define EM_SPARC32PLUS 18 /* SPARC v8plus. */ #define EM_960 19 /* Intel 80960. */ #define EM_PPC 20 /* PowerPC 32-bit. */ #define EM_PPC64 21 /* PowerPC 64-bit. */ #define EM_S390 22 /* IBM System/390. */ #define EM_V800 36 /* NEC V800. */ #define EM_FR20 37 /* Fujitsu FR20. */ #define EM_RH32 38 /* TRW RH-32. */ #define EM_RCE 39 /* Motorola RCE. */ #define EM_ARM 40 /* ARM. */ #define EM_SH 42 /* Hitachi SH. */ #define EM_SPARCV9 43 /* SPARC v9 64-bit. */ #define EM_TRICORE 44 /* Siemens TriCore embedded processor. */ #define EM_ARC 45 /* Argonaut RISC Core. */ #define EM_H8_300 46 /* Hitachi H8/300. */ #define EM_H8_300H 47 /* Hitachi H8/300H. */ #define EM_H8S 48 /* Hitachi H8S. */ #define EM_H8_500 49 /* Hitachi H8/500. */ #define EM_IA_64 50 /* Intel IA-64 Processor. */ #define EM_MIPS_X 51 /* Stanford MIPS-X. */ #define EM_COLDFIRE 52 /* Motorola ColdFire. */ #define EM_68HC12 53 /* Motorola M68HC12. */ #define EM_MMA 54 /* Fujitsu MMA. */ #define EM_PCP 55 /* Siemens PCP. */ #define EM_NCPU 56 /* Sony nCPU. */ #define EM_NDR1 57 /* Denso NDR1 microprocessor. */ #define EM_STARCORE 58 /* Motorola Star*Core processor. */ #define EM_ME16 59 /* Toyota ME16 processor. */ #define EM_ST100 60 /* STMicroelectronics ST100 processor. */ #define EM_TINYJ 61 /* Advanced Logic Corp. TinyJ processor. */ #define EM_X86_64 62 /* Advanced Micro Devices x86-64 */ #define EM_AMD64 EM_X86_64 /* Advanced Micro Devices x86-64 (compat) */ #define EM_PDSP 63 /* Sony DSP Processor. */ #define EM_FX66 66 /* Siemens FX66 microcontroller. */ #define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16 microcontroller. */ #define EM_ST7 68 /* STmicroelectronics ST7 8-bit microcontroller. */ #define EM_68HC16 69 /* Motorola MC68HC16 microcontroller. */ #define EM_68HC11 70 /* Motorola MC68HC11 microcontroller. */ #define EM_68HC08 71 /* Motorola MC68HC08 microcontroller. */ #define EM_68HC05 72 /* Motorola MC68HC05 microcontroller. */ #define EM_SVX 73 /* Silicon Graphics SVx. */ #define EM_ST19 74 /* STMicroelectronics ST19 8-bit mc. */ #define EM_VAX 75 /* Digital VAX. */ #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor. */ #define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded processor. */ #define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor. */ #define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor. */ #define EM_MMIX 80 /* Donald Knuth's educational 64-bit proc. */ #define EM_HUANY 81 /* Harvard University machine-independent object files. */ #define EM_PRISM 82 /* SiTera Prism. */ #define EM_AVR 83 /* Atmel AVR 8-bit microcontroller. */ #define EM_FR30 84 /* Fujitsu FR30. */ #define EM_D10V 85 /* Mitsubishi D10V. */ #define EM_D30V 86 /* Mitsubishi D30V. */ #define EM_V850 87 /* NEC v850. */ #define EM_M32R 88 /* Mitsubishi M32R. */ #define EM_MN10300 89 /* Matsushita MN10300. */ #define EM_MN10200 90 /* Matsushita MN10200. */ #define EM_PJ 91 /* picoJava. */ #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor. */ #define EM_ARC_A5 93 /* ARC Cores Tangent-A5. */ #define EM_XTENSA 94 /* Tensilica Xtensa Architecture. */ #define EM_VIDEOCORE 95 /* Alphamosaic VideoCore processor. */ #define EM_TMM_GPP 96 /* Thompson Multimedia General Purpose Processor. */ #define EM_NS32K 97 /* National Semiconductor 32000 series. */ #define EM_TPC 98 /* Tenor Network TPC processor. */ #define EM_SNP1K 99 /* Trebia SNP 1000 processor. */ #define EM_ST200 100 /* STMicroelectronics ST200 microcontroller. */ #define EM_IP2K 101 /* Ubicom IP2xxx microcontroller family. */ #define EM_MAX 102 /* MAX Processor. */ #define EM_CR 103 /* National Semiconductor CompactRISC microprocessor. */ #define EM_F2MC16 104 /* Fujitsu F2MC16. */ #define EM_MSP430 105 /* Texas Instruments embedded microcontroller msp430. */ #define EM_BLACKFIN 106 /* Analog Devices Blackfin (DSP) processor. */ #define EM_SE_C33 107 /* S1C33 Family of Seiko Epson processors. */ #define EM_SEP 108 /* Sharp embedded microprocessor. */ #define EM_ARCA 109 /* Arca RISC Microprocessor. */ #define EM_UNICORE 110 /* Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University */ #define EM_AARCH64 183 /* AArch64 (64-bit ARM) */ #define EM_RISCV 243 /* RISC-V */ #define EM_LOONGARCH 258 /* Loongson LoongArch */ /* Non-standard or deprecated. */ #define EM_486 6 /* Intel i486. */ #define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */ #define EM_ALPHA_STD 41 /* Digital Alpha (standard value). */ #define EM_ALPHA 0x9026 /* Alpha (written in the absence of an ABI) */ /** * e_flags */ #define EF_ARM_RELEXEC 0x1 #define EF_ARM_HASENTRY 0x2 #define EF_ARM_SYMSARESORTED 0x4 #define EF_ARM_DYNSYMSUSESEGIDX 0x8 #define EF_ARM_MAPSYMSFIRST 0x10 #define EF_ARM_LE8 0x00400000 #define EF_ARM_BE8 0x00800000 #define EF_ARM_EABIMASK 0xFF000000 #define EF_ARM_EABI_UNKNOWN 0x00000000 #define EF_ARM_EABI_VER1 0x01000000 #define EF_ARM_EABI_VER2 0x02000000 #define EF_ARM_EABI_VER3 0x03000000 #define EF_ARM_EABI_VER4 0x04000000 #define EF_ARM_EABI_VER5 0x05000000 #define EF_ARM_EABI_VERSION(x) ((x) & EF_ARM_EABIMASK) #define EF_ARM_INTERWORK 0x00000004 #define EF_ARM_APCS_26 0x00000008 #define EF_ARM_APCS_FLOAT 0x00000010 #define EF_ARM_PIC 0x00000020 #define EF_ARM_ALIGN8 0x00000040 #define EF_ARM_NEW_ABI 0x00000080 #define EF_ARM_OLD_ABI 0x00000100 #define EF_ARM_ABI_FLOAT_SOFT 0x00000200 #define EF_ARM_SOFT_FLOAT EF_ARM_ABI_FLOAT_SOFT /* Pre-V5 ABI name */ #define EF_ARM_ABI_FLOAT_HARD 0x00000400 #define EF_ARM_VFP_FLOAT EF_ARM_ABI_FLOAT_HARD /* Pre-V5 ABI name */ #define EF_ARM_MAVERICK_FLOAT 0x00000800 #define EF_MIPS_NOREORDER 0x00000001 #define EF_MIPS_PIC 0x00000002 /* Contains PIC code */ #define EF_MIPS_CPIC 0x00000004 /* STD PIC calling sequence */ #define EF_MIPS_UCODE 0x00000010 #define EF_MIPS_ABI2 0x00000020 /* N32 */ #define EF_MIPS_OPTIONS_FIRST 0x00000080 #define EF_MIPS_ABI 0x0000F000 #define EF_MIPS_ABI_O32 0x00001000 #define EF_MIPS_ABI_O64 0x00002000 #define EF_MIPS_ABI_EABI32 0x00003000 #define EF_MIPS_ABI_EABI64 0x00004000 #define EF_MIPS_ARCH_ASE 0x0F000000 /* Architectural extensions */ #define EF_MIPS_ARCH_ASE_MDMX 0x08000000 /* MDMX multimedia extension */ #define EF_MIPS_ARCH_ASE_M16 0x04000000 /* MIPS-16 ISA extensions */ #define EF_MIPS_ARCH 0xF0000000 /* Architecture field */ #define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code */ #define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code */ #define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code */ #define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code */ #define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code */ #define EF_MIPS_ARCH_32 0x50000000 /* -mips32 code */ #define EF_MIPS_ARCH_64 0x60000000 /* -mips64 code */ #define EF_MIPS_ARCH_32R2 0x70000000 /* -mips32r2 code */ #define EF_MIPS_ARCH_64R2 0x80000000 /* -mips64r2 code */ #define EF_PPC_EMB 0x80000000 #define EF_PPC_RELOCATABLE 0x00010000 #define EF_PPC_RELOCATABLE_LIB 0x00008000 #define EF_RISCV_RVC 0x00000001 #define EF_RISCV_FLOAT_ABI_MASK 0x00000006 #define EF_RISCV_FLOAT_ABI_SOFT 0x00000000 #define EF_RISCV_FLOAT_ABI_SINGLE 0x000002 #define EF_RISCV_FLOAT_ABI_DOUBLE 0x000004 #define EF_RISCV_FLOAT_ABI_QUAD 0x00000006 #define EF_RISCV_RVE 0x00000008 #define EF_RISCV_TSO 0x00000010 /* * Loongson LoongArch Specific e_flags * * Definitions from LoongArch ELF psABI v2.01. * Reference: https://github.com/loongson/LoongArch-Documentation * (commit hash 296de4def055c871809068e0816325a4ac04eb12) */ /* LoongArch Base ABI Modifiers */ #define EF_LOONGARCH_ABI_SOFT_FLOAT 0x00000001 #define EF_LOONGARCH_ABI_SINGLE_FLOAT 0x00000002 #define EF_LOONGARCH_ABI_DOUBLE_FLOAT 0x00000003 #define EF_LOONGARCH_ABI_MODIFIER_MASK 0x00000007 /* LoongArch Object file ABI versions */ #define EF_LOONGARCH_OBJABI_V0 0x00000000 #define EF_LOONGARCH_OBJABI_V1 0x00000040 #define EF_LOONGARCH_OBJABI_MASK 0x000000C0 #define EF_SPARC_EXT_MASK 0x00ffff00 #define EF_SPARC_32PLUS 0x00000100 #define EF_SPARC_SUN_US1 0x00000200 #define EF_SPARC_HAL_R1 0x00000200 #define EF_SPARC_SUN_US3 0x00000800 #define EF_SPARCV9_MM 0x00000003 #define EF_SPARCV9_TSO 0x00000000 #define EF_SPARCV9_PSO 0x00000001 #define EF_SPARCV9_RMO 0x00000002 /* Special section indexes. */ #define SHN_UNDEF 0 /* Undefined, missing, irrelevant. */ #define SHN_LORESERVE 0xff00 /* First of reserved range. */ #define SHN_LOPROC 0xff00 /* First processor-specific. */ #define SHN_HIPROC 0xff1f /* Last processor-specific. */ #define SHN_LOOS 0xff20 /* First operating system-specific. */ #define SHN_FBSD_CACHED SHN_LOOS /* Transient, for sys/kern/link_elf_obj linker only: Cached global in local symtab. */ #define SHN_HIOS 0xff3f /* Last operating system-specific. */ #define SHN_ABS 0xfff1 /* Absolute values. */ #define SHN_COMMON 0xfff2 /* Common data. */ #define SHN_XINDEX 0xffff /* Escape -- index stored elsewhere. */ #define SHN_HIRESERVE 0xffff /* Last of reserved range. */ /* sh_type */ #define SHT_NULL 0 /* inactive */ #define SHT_PROGBITS 1 /* program defined information */ #define SHT_SYMTAB 2 /* symbol table section */ #define SHT_STRTAB 3 /* string table section */ #define SHT_RELA 4 /* relocation section with addends */ #define SHT_HASH 5 /* symbol hash table section */ #define SHT_DYNAMIC 6 /* dynamic section */ #define SHT_NOTE 7 /* note section */ #define SHT_NOBITS 8 /* no space section */ #define SHT_REL 9 /* relocation section - no addends */ #define SHT_SHLIB 10 /* reserved - purpose unknown */ #define SHT_DYNSYM 11 /* dynamic symbol table section */ #define SHT_INIT_ARRAY 14 /* Initialization function pointers. */ #define SHT_FINI_ARRAY 15 /* Termination function pointers. */ #define SHT_PREINIT_ARRAY 16 /* Pre-initialization function ptrs. */ #define SHT_GROUP 17 /* Section group. */ #define SHT_SYMTAB_SHNDX 18 /* Section indexes (see SHN_XINDEX). */ #define SHT_LOOS 0x60000000 /* First of OS specific semantics */ #define SHT_LOSUNW 0x6ffffff4 #define SHT_SUNW_dof 0x6ffffff4 #define SHT_SUNW_cap 0x6ffffff5 #define SHT_GNU_ATTRIBUTES 0x6ffffff5 #define SHT_SUNW_SIGNATURE 0x6ffffff6 #define SHT_GNU_HASH 0x6ffffff6 #define SHT_GNU_LIBLIST 0x6ffffff7 #define SHT_SUNW_ANNOTATE 0x6ffffff7 #define SHT_SUNW_DEBUGSTR 0x6ffffff8 #define SHT_SUNW_DEBUG 0x6ffffff9 #define SHT_SUNW_move 0x6ffffffa #define SHT_SUNW_COMDAT 0x6ffffffb #define SHT_SUNW_syminfo 0x6ffffffc #define SHT_SUNW_verdef 0x6ffffffd #define SHT_GNU_verdef 0x6ffffffd /* Symbol versions provided */ #define SHT_SUNW_verneed 0x6ffffffe #define SHT_GNU_verneed 0x6ffffffe /* Symbol versions required */ #define SHT_SUNW_versym 0x6fffffff #define SHT_GNU_versym 0x6fffffff /* Symbol version table */ #define SHT_HISUNW 0x6fffffff #define SHT_HIOS 0x6fffffff /* Last of OS specific semantics */ #define SHT_LOPROC 0x70000000 /* reserved range for processor */ #define SHT_X86_64_UNWIND 0x70000001 /* unwind information */ #define SHT_AMD64_UNWIND SHT_X86_64_UNWIND #define SHT_ARM_EXIDX 0x70000001 /* Exception index table. */ #define SHT_ARM_PREEMPTMAP 0x70000002 /* BPABI DLL dynamic linking pre-emption map. */ #define SHT_ARM_ATTRIBUTES 0x70000003 /* Object file compatibility attributes. */ #define SHT_ARM_DEBUGOVERLAY 0x70000004 /* See DBGOVL for details. */ #define SHT_ARM_OVERLAYSECTION 0x70000005 /* See DBGOVL for details. */ #define SHT_MIPS_LIBLIST 0x70000000 #define SHT_MIPS_MSYM 0x70000001 #define SHT_MIPS_CONFLICT 0x70000002 #define SHT_MIPS_GPTAB 0x70000003 #define SHT_MIPS_UCODE 0x70000004 #define SHT_MIPS_DEBUG 0x70000005 #define SHT_MIPS_REGINFO 0x70000006 #define SHT_MIPS_PACKAGE 0x70000007 #define SHT_MIPS_PACKSYM 0x70000008 #define SHT_MIPS_RELD 0x70000009 #define SHT_MIPS_IFACE 0x7000000b #define SHT_MIPS_CONTENT 0x7000000c #define SHT_MIPS_OPTIONS 0x7000000d #define SHT_MIPS_DELTASYM 0x7000001b #define SHT_MIPS_DELTAINST 0x7000001c #define SHT_MIPS_DELTACLASS 0x7000001d #define SHT_MIPS_DWARF 0x7000001e /* MIPS gcc uses MIPS_DWARF */ #define SHT_MIPS_DELTADECL 0x7000001f #define SHT_MIPS_SYMBOL_LIB 0x70000020 #define SHT_MIPS_EVENTS 0x70000021 #define SHT_MIPS_TRANSLATE 0x70000022 #define SHT_MIPS_PIXIE 0x70000023 #define SHT_MIPS_XLATE 0x70000024 #define SHT_MIPS_XLATE_DEBUG 0x70000025 #define SHT_MIPS_WHIRL 0x70000026 #define SHT_MIPS_EH_REGION 0x70000027 #define SHT_MIPS_XLATE_OLD 0x70000028 #define SHT_MIPS_PDR_EXCEPTION 0x70000029 #define SHT_MIPS_ABIFLAGS 0x7000002a #define SHT_SPARC_GOTDATA 0x70000000 #define SHTORDERED #define SHT_HIPROC 0x7fffffff /* specific section header types */ #define SHT_LOUSER 0x80000000 /* reserved range for application */ #define SHT_HIUSER 0xffffffff /* specific indexes */ /* Flags for sh_flags. */ #define SHF_WRITE 0x1 /* Section contains writable data. */ #define SHF_ALLOC 0x2 /* Section occupies memory. */ #define SHF_EXECINSTR 0x4 /* Section contains instructions. */ #define SHF_MERGE 0x10 /* Section may be merged. */ #define SHF_STRINGS 0x20 /* Section contains strings. */ #define SHF_INFO_LINK 0x40 /* sh_info holds section index. */ #define SHF_LINK_ORDER 0x80 /* Special ordering requirements. */ #define SHF_OS_NONCONFORMING 0x100 /* OS-specific processing required. */ #define SHF_GROUP 0x200 /* Member of section group. */ #define SHF_TLS 0x400 /* Section contains TLS data. */ #define SHF_COMPRESSED 0x800 /* Section contains compressed data. */ #define SHF_MASKOS 0x0ff00000 /* OS-specific semantics. */ #define SHF_MASKPROC 0xf0000000 /* Processor-specific semantics. */ /* Flags for section groups. */ #define GRP_COMDAT 0x1 /* COMDAT semantics. */ /* * Flags / mask for .gnu.versym sections. */ #define VERSYM_VERSION 0x7fff #define VERSYM_HIDDEN 0x8000 /* Values for p_type. */ #define PT_NULL 0 /* Unused entry. */ #define PT_LOAD 1 /* Loadable segment. */ #define PT_DYNAMIC 2 /* Dynamic linking information segment. */ #define PT_INTERP 3 /* Pathname of interpreter. */ #define PT_NOTE 4 /* Auxiliary information. */ #define PT_SHLIB 5 /* Reserved (not used). */ #define PT_PHDR 6 /* Location of program header itself. */ #define PT_TLS 7 /* Thread local storage segment */ #define PT_LOOS 0x60000000 /* First OS-specific. */ #define PT_SUNW_UNWIND 0x6464e550 /* amd64 UNWIND program header */ #define PT_GNU_EH_FRAME 0x6474e550 #define PT_GNU_STACK 0x6474e551 #define PT_GNU_RELRO 0x6474e552 #define PT_DUMP_DELTA 0x6fb5d000 /* va->pa map for kernel dumps (currently arm). */ #define PT_LOSUNW 0x6ffffffa #define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */ #define PT_SUNWSTACK 0x6ffffffb /* describes the stack segment */ #define PT_SUNWDTRACE 0x6ffffffc /* private */ #define PT_SUNWCAP 0x6ffffffd /* hard/soft capabilities segment */ #define PT_HISUNW 0x6fffffff #define PT_HIOS 0x6fffffff /* Last OS-specific. */ #define PT_LOPROC 0x70000000 /* First processor-specific type. */ #define PT_ARM_ARCHEXT 0x70000000 /* ARM arch compat information. */ #define PT_ARM_EXIDX 0x70000001 /* ARM exception unwind tables. */ #define PT_MIPS_REGINFO 0x70000000 /* MIPS register usage info */ #define PT_MIPS_RTPROC 0x70000001 /* MIPS runtime procedure tbl */ #define PT_MIPS_OPTIONS 0x70000002 /* MIPS e_flags value*/ #define PT_MIPS_ABIFLAGS 0x70000003 /* MIPS fp mode */ #define PT_HIPROC 0x7fffffff /* Last processor-specific type. */ #define PT_OPENBSD_RANDOMIZE 0x65A3DBE6 /* OpenBSD random data segment */ #define PT_OPENBSD_WXNEEDED 0x65A3DBE7 /* OpenBSD EXEC/WRITE pages needed */ #define PT_OPENBSD_BOOTDATA 0x65A41BE6 /* OpenBSD section for boot args */ /* Values for p_flags. */ #define PF_X 0x1 /* Executable. */ #define PF_W 0x2 /* Writable. */ #define PF_R 0x4 /* Readable. */ #define PF_MASKOS 0x0ff00000 /* Operating system-specific. */ #define PF_MASKPROC 0xf0000000 /* Processor-specific. */ /* Extended program header index. */ #define PN_XNUM 0xffff /* Values for d_tag. */ #define DT_NULL 0 /* Terminating entry. */ #define DT_NEEDED 1 /* String table offset of a needed shared library. */ #define DT_PLTRELSZ 2 /* Total size in bytes of PLT relocations. */ #define DT_PLTGOT 3 /* Processor-dependent address. */ #define DT_HASH 4 /* Address of symbol hash table. */ #define DT_STRTAB 5 /* Address of string table. */ #define DT_SYMTAB 6 /* Address of symbol table. */ #define DT_RELA 7 /* Address of ElfNN_Rela relocations. */ #define DT_RELASZ 8 /* Total size of ElfNN_Rela relocations. */ #define DT_RELAENT 9 /* Size of each ElfNN_Rela relocation entry. */ #define DT_STRSZ 10 /* Size of string table. */ #define DT_SYMENT 11 /* Size of each symbol table entry. */ #define DT_INIT 12 /* Address of initialization function. */ #define DT_FINI 13 /* Address of finalization function. */ #define DT_SONAME 14 /* String table offset of shared object name. */ #define DT_RPATH 15 /* String table offset of library path. [sup] */ #define DT_SYMBOLIC 16 /* Indicates "symbolic" linking. [sup] */ #define DT_REL 17 /* Address of ElfNN_Rel relocations. */ #define DT_RELSZ 18 /* Total size of ElfNN_Rel relocations. */ #define DT_RELENT 19 /* Size of each ElfNN_Rel relocation. */ #define DT_PLTREL 20 /* Type of relocation used for PLT. */ #define DT_DEBUG 21 /* Reserved (not used). */ #define DT_TEXTREL 22 /* Indicates there may be relocations in non-writable segments. [sup] */ #define DT_JMPREL 23 /* Address of PLT relocations. */ #define DT_BIND_NOW 24 /* [sup] */ #define DT_INIT_ARRAY 25 /* Address of the array of pointers to initialization functions */ #define DT_FINI_ARRAY 26 /* Address of the array of pointers to termination functions */ #define DT_INIT_ARRAYSZ 27 /* Size in bytes of the array of initialization functions. */ #define DT_FINI_ARRAYSZ 28 /* Size in bytes of the array of termination functions. */ #define DT_RUNPATH 29 /* String table offset of a null-terminated library search path string. */ #define DT_FLAGS 30 /* Object specific flag values. */ #define DT_ENCODING 32 /* Values greater than or equal to DT_ENCODING and less than DT_LOOS follow the rules for the interpretation of the d_un union as follows: even == 'd_ptr', odd == 'd_val' or none */ #define DT_PREINIT_ARRAY 32 /* Address of the array of pointers to pre-initialization functions. */ #define DT_PREINIT_ARRAYSZ 33 /* Size in bytes of the array of pre-initialization functions. */ #define DT_MAXPOSTAGS 34 /* number of positive tags */ #define DT_RELRSZ 35 /* Total size of ElfNN_Relr relocations. */ #define DT_RELR 36 /* Address of ElfNN_Relr relocations. */ #define DT_RELRENT 37 /* Size of each ElfNN_Relr relocation. */ #define DT_LOOS 0x6000000d /* First OS-specific */ #define DT_SUNW_AUXILIARY 0x6000000d /* symbol auxiliary name */ #define DT_SUNW_RTLDINF 0x6000000e /* ld.so.1 info (private) */ #define DT_SUNW_FILTER 0x6000000f /* symbol filter name */ #define DT_SUNW_CAP 0x60000010 /* hardware/software */ #define DT_SUNW_ASLR 0x60000023 /* ASLR control */ #define DT_HIOS 0x6ffff000 /* Last OS-specific */ /* * DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the * Dyn.d_un.d_val field of the Elf*_Dyn structure. */ #define DT_VALRNGLO 0x6ffffd00 #define DT_GNU_PRELINKED 0x6ffffdf5 /* prelinking timestamp */ #define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* size of conflict section */ #define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* size of library list */ #define DT_CHECKSUM 0x6ffffdf8 /* elf checksum */ #define DT_PLTPADSZ 0x6ffffdf9 /* pltpadding size */ #define DT_MOVEENT 0x6ffffdfa /* move table entry size */ #define DT_MOVESZ 0x6ffffdfb /* move table size */ #define DT_FEATURE 0x6ffffdfc /* feature holder */ #define DT_FEATURE_1 DT_FEATURE #define DT_POSFLAG_1 0x6ffffdfd /* flags for DT_* entries, effecting */ /* the following DT_* entry. */ /* See DF_P1_* definitions */ #define DT_SYMINSZ 0x6ffffdfe /* syminfo table size (in bytes) */ #define DT_SYMINENT 0x6ffffdff /* syminfo entry size (in bytes) */ #define DT_VALRNGHI 0x6ffffdff /* * DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the * Dyn.d_un.d_ptr field of the Elf*_Dyn structure. * * If any adjustment is made to the ELF object after it has been * built, these entries will need to be adjusted. */ #define DT_ADDRRNGLO 0x6ffffe00 #define DT_GNU_HASH 0x6ffffef5 /* GNU-style hash table */ #define DT_TLSDESC_PLT 0x6ffffef6 /* loc. of PLT for tlsdesc resolver */ #define DT_TLSDESC_GOT 0x6ffffef7 /* loc. of GOT for tlsdesc resolver */ #define DT_GNU_CONFLICT 0x6ffffef8 /* address of conflict section */ #define DT_GNU_LIBLIST 0x6ffffef9 /* address of library list */ #define DT_CONFIG 0x6ffffefa /* configuration information */ #define DT_DEPAUDIT 0x6ffffefb /* dependency auditing */ #define DT_AUDIT 0x6ffffefc /* object auditing */ #define DT_PLTPAD 0x6ffffefd /* pltpadding (sparcv9) */ #define DT_MOVETAB 0x6ffffefe /* move table */ #define DT_SYMINFO 0x6ffffeff /* syminfo table */ #define DT_ADDRRNGHI 0x6ffffeff #define DT_VERSYM 0x6ffffff0 /* Address of versym section. */ #define DT_RELACOUNT 0x6ffffff9 /* number of RELATIVE relocations */ #define DT_RELCOUNT 0x6ffffffa /* number of RELATIVE relocations */ #define DT_FLAGS_1 0x6ffffffb /* state flags - see DF_1_* defs */ #define DT_VERDEF 0x6ffffffc /* Address of verdef section. */ #define DT_VERDEFNUM 0x6ffffffd /* Number of elems in verdef section */ #define DT_VERNEED 0x6ffffffe /* Address of verneed section. */ #define DT_VERNEEDNUM 0x6fffffff /* Number of elems in verneed section */ #define DT_LOPROC 0x70000000 /* First processor-specific type. */ #define DT_AARCH64_BTI_PLT 0x70000001 #define DT_AARCH64_PAC_PLT 0x70000003 #define DT_AARCH64_VARIANT_PCS 0x70000005 #define DT_ARM_SYMTABSZ 0x70000001 #define DT_ARM_PREEMPTMAP 0x70000002 #define DT_SPARC_REGISTER 0x70000001 #define DT_DEPRECATED_SPARC_REGISTER 0x7000001 #define DT_MIPS_RLD_VERSION 0x70000001 #define DT_MIPS_TIME_STAMP 0x70000002 #define DT_MIPS_ICHECKSUM 0x70000003 #define DT_MIPS_IVERSION 0x70000004 #define DT_MIPS_FLAGS 0x70000005 #define DT_MIPS_BASE_ADDRESS 0x70000006 #define DT_MIPS_CONFLICT 0x70000008 #define DT_MIPS_LIBLIST 0x70000009 #define DT_MIPS_LOCAL_GOTNO 0x7000000a #define DT_MIPS_CONFLICTNO 0x7000000b #define DT_MIPS_LIBLISTNO 0x70000010 #define DT_MIPS_SYMTABNO 0x70000011 #define DT_MIPS_UNREFEXTNO 0x70000012 #define DT_MIPS_GOTSYM 0x70000013 #define DT_MIPS_HIPAGENO 0x70000014 #define DT_MIPS_RLD_MAP 0x70000016 #define DT_MIPS_DELTA_CLASS 0x70000017 #define DT_MIPS_DELTA_CLASS_NO 0x70000018 #define DT_MIPS_DELTA_INSTANCE 0x70000019 #define DT_MIPS_DELTA_INSTANCE_NO 0x7000001A #define DT_MIPS_DELTA_RELOC 0x7000001B #define DT_MIPS_DELTA_RELOC_NO 0x7000001C #define DT_MIPS_DELTA_SYM 0x7000001D #define DT_MIPS_DELTA_SYM_NO 0x7000001E #define DT_MIPS_DELTA_CLASSSYM 0x70000020 #define DT_MIPS_DELTA_CLASSSYM_NO 0x70000021 #define DT_MIPS_CXX_FLAGS 0x70000022 #define DT_MIPS_PIXIE_INIT 0x70000023 #define DT_MIPS_SYMBOL_LIB 0x70000024 #define DT_MIPS_LOCALPAGE_GOTIDX 0x70000025 #define DT_MIPS_LOCAL_GOTIDX 0x70000026 #define DT_MIPS_HIDDEN_GOTIDX 0x70000027 #define DT_MIPS_PROTECTED_GOTIDX 0x70000028 #define DT_MIPS_OPTIONS 0x70000029 #define DT_MIPS_INTERFACE 0x7000002A #define DT_MIPS_DYNSTR_ALIGN 0x7000002B #define DT_MIPS_INTERFACE_SIZE 0x7000002C #define DT_MIPS_RLD_TEXT_RESOLVE_ADDR 0x7000002D #define DT_MIPS_PERF_SUFFIX 0x7000002E #define DT_MIPS_COMPACT_SIZE 0x7000002F #define DT_MIPS_GP_VALUE 0x70000030 #define DT_MIPS_AUX_DYNAMIC 0x70000031 #define DT_MIPS_PLTGOT 0x70000032 #define DT_MIPS_RLD_OBJ_UPDATE 0x70000033 #define DT_MIPS_RWPLT 0x70000034 #define DT_MIPS_RLD_MAP_REL 0x70000035 #define DT_PPC_GOT 0x70000000 #define DT_PPC_TLSOPT 0x70000001 #define DT_PPC64_GLINK 0x70000000 #define DT_PPC64_OPD 0x70000001 #define DT_PPC64_OPDSZ 0x70000002 #define DT_PPC64_TLSOPT 0x70000003 #define DT_AUXILIARY 0x7ffffffd /* shared library auxiliary name */ #define DT_USED 0x7ffffffe /* ignored - same as needed */ #define DT_FILTER 0x7fffffff /* shared library filter name */ #define DT_HIPROC 0x7fffffff /* Last processor-specific type. */ /* Values for DT_FLAGS */ #define DF_ORIGIN 0x0001 /* Indicates that the object being loaded may make reference to the $ORIGIN substitution string */ #define DF_SYMBOLIC 0x0002 /* Indicates "symbolic" linking. */ #define DF_TEXTREL 0x0004 /* Indicates there may be relocations in non-writable segments. */ #define DF_BIND_NOW 0x0008 /* Indicates that the dynamic linker should process all relocations for the object containing this entry before transferring control to the program. */ #define DF_STATIC_TLS 0x0010 /* Indicates that the shared object or executable contains code using a static thread-local storage scheme. */ /* Values for DT_FLAGS_1 */ #define DF_1_BIND_NOW 0x00000001 /* Same as DF_BIND_NOW */ #define DF_1_GLOBAL 0x00000002 /* Set the RTLD_GLOBAL for object */ #define DF_1_NODELETE 0x00000008 /* Set the RTLD_NODELETE for object */ #define DF_1_LOADFLTR 0x00000010 /* Immediate loading of filtees */ #define DF_1_NOOPEN 0x00000040 /* Do not allow loading on dlopen() */ #define DF_1_ORIGIN 0x00000080 /* Process $ORIGIN */ #define DF_1_INTERPOSE 0x00000400 /* Interpose all objects but main */ #define DF_1_NODEFLIB 0x00000800 /* Do not search default paths */ #define DF_1_PIE 0x08000000 /* Is position-independent executable */ /* Values for l_flags. */ #define LL_NONE 0x0 /* no flags */ #define LL_EXACT_MATCH 0x1 /* require an exact match */ #define LL_IGNORE_INT_VER 0x2 /* ignore version incompatibilities */ #define LL_REQUIRE_MINOR 0x4 #define LL_EXPORTS 0x8 #define LL_DELAY_LOAD 0x10 #define LL_DELTA 0x20 /* Note section names */ #define ELF_NOTE_FREEBSD "FreeBSD" #define ELF_NOTE_NETBSD "NetBSD" #define ELF_NOTE_SOLARIS "SUNW Solaris" #define ELF_NOTE_GNU "GNU" /* Values for n_type used in executables. */ #define NT_FREEBSD_ABI_TAG 1 #define NT_FREEBSD_NOINIT_TAG 2 #define NT_FREEBSD_ARCH_TAG 3 #define NT_FREEBSD_FEATURE_CTL 4 /* NT_FREEBSD_FEATURE_CTL desc[0] bits */ #define NT_FREEBSD_FCTL_ASLR_DISABLE 0x00000001 #define NT_FREEBSD_FCTL_PROTMAX_DISABLE 0x00000002 #define NT_FREEBSD_FCTL_STKGAP_DISABLE 0x00000004 #define NT_FREEBSD_FCTL_WXNEEDED 0x00000008 #define NT_FREEBSD_FCTL_LA48 0x00000010 /* was ASG_DISABLE, do not reuse 0x00000020 */ /* Values for n_type. Used in core files. */ #define NT_PRSTATUS 1 /* Process status. */ #define NT_FPREGSET 2 /* Floating point registers. */ #define NT_PRPSINFO 3 /* Process state info. */ #define NT_THRMISC 7 /* Thread miscellaneous info. */ #define NT_PROCSTAT_PROC 8 /* Procstat proc data. */ #define NT_PROCSTAT_FILES 9 /* Procstat files data. */ #define NT_PROCSTAT_VMMAP 10 /* Procstat vmmap data. */ #define NT_PROCSTAT_GROUPS 11 /* Procstat groups data. */ #define NT_PROCSTAT_UMASK 12 /* Procstat umask data. */ #define NT_PROCSTAT_RLIMIT 13 /* Procstat rlimit data. */ #define NT_PROCSTAT_OSREL 14 /* Procstat osreldate data. */ #define NT_PROCSTAT_PSSTRINGS 15 /* Procstat ps_strings data. */ #define NT_PROCSTAT_AUXV 16 /* Procstat auxv data. */ #define NT_PTLWPINFO 17 /* Thread ptrace miscellaneous info. */ +#define NT_PROCSTAT_KQUEUES 18 /* Procstat kqueues events. */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_X86_SEGBASES 0x200 /* x86 FS/GS base addresses. */ #define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state. */ #define NT_ARM_VFP 0x400 /* ARM VFP registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_SVE 0x405 /* ARM SVE registers */ #define NT_ARM_ADDR_MASK 0x406 /* arm64 address mask (e.g. for TBI) */ /* GNU note types. */ #define NT_GNU_ABI_TAG 1 #define NT_GNU_HWCAP 2 #define NT_GNU_BUILD_ID 3 #define NT_GNU_GOLD_VERSION 4 #define NT_GNU_PROPERTY_TYPE_0 5 #define GNU_PROPERTY_LOPROC 0xc0000000 #define GNU_PROPERTY_HIPROC 0xdfffffff #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 0x00000001 #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 0x00000002 #define GNU_PROPERTY_X86_FEATURE_1_AND 0xc0000002 #define GNU_PROPERTY_X86_FEATURE_1_IBT 0x00000001 #define GNU_PROPERTY_X86_FEATURE_1_SHSTK 0x00000002 /* Symbol Binding - ELFNN_ST_BIND - st_info */ #define STB_LOCAL 0 /* Local symbol */ #define STB_GLOBAL 1 /* Global symbol */ #define STB_WEAK 2 /* like global - lower precedence */ #define STB_LOOS 10 /* Start of operating system reserved range. */ #define STB_GNU_UNIQUE 10 /* Unique symbol (GNU) */ #define STB_HIOS 12 /* End of operating system reserved range. */ #define STB_LOPROC 13 /* reserved range for processor */ #define STB_HIPROC 15 /* specific semantics. */ /* Symbol type - ELFNN_ST_TYPE - st_info */ #define STT_NOTYPE 0 /* Unspecified type. */ #define STT_OBJECT 1 /* Data object. */ #define STT_FUNC 2 /* Function. */ #define STT_SECTION 3 /* Section. */ #define STT_FILE 4 /* Source file. */ #define STT_COMMON 5 /* Uninitialized common block. */ #define STT_TLS 6 /* TLS object. */ #define STT_NUM 7 #define STT_LOOS 10 /* Reserved range for operating system */ #define STT_GNU_IFUNC 10 #define STT_HIOS 12 /* specific semantics. */ #define STT_LOPROC 13 /* Start of processor reserved range. */ #define STT_SPARC_REGISTER 13 /* SPARC register information. */ #define STT_HIPROC 15 /* End of processor reserved range. */ /* Symbol visibility - ELFNN_ST_VISIBILITY - st_other */ #define STV_DEFAULT 0x0 /* Default visibility (see binding). */ #define STV_INTERNAL 0x1 /* Special meaning in relocatable objects. */ #define STV_HIDDEN 0x2 /* Not visible. */ #define STV_PROTECTED 0x3 /* Visible but not preemptible. */ #define STV_EXPORTED 0x4 #define STV_SINGLETON 0x5 #define STV_ELIMINATE 0x6 /* Architecture specific data - st_other */ #define STO_AARCH64_VARIANT_PCS 0x80 /* Special symbol table indexes. */ #define STN_UNDEF 0 /* Undefined symbol index. */ /* Symbol versioning flags. */ #define VER_DEF_CURRENT 1 #define VER_DEF_IDX(x) VER_NDX(x) #define VER_FLG_BASE 0x01 #define VER_FLG_WEAK 0x02 #define VER_NEED_CURRENT 1 #define VER_NEED_WEAK (1u << 15) #define VER_NEED_HIDDEN VER_NDX_HIDDEN #define VER_NEED_IDX(x) VER_NDX(x) #define VER_NDX_LOCAL 0 #define VER_NDX_GLOBAL 1 #define VER_NDX_GIVEN 2 #define VER_NDX_HIDDEN (1u << 15) #define VER_NDX(x) ((x) & ~(1u << 15)) #define CA_SUNW_NULL 0 #define CA_SUNW_HW_1 1 /* first hardware capabilities entry */ #define CA_SUNW_SF_1 2 /* first software capabilities entry */ /* * Syminfo flag values */ #define SYMINFO_FLG_DIRECT 0x0001 /* symbol ref has direct association */ /* to object containing defn. */ #define SYMINFO_FLG_PASSTHRU 0x0002 /* ignored - see SYMINFO_FLG_FILTER */ #define SYMINFO_FLG_COPY 0x0004 /* symbol is a copy-reloc */ #define SYMINFO_FLG_LAZYLOAD 0x0008 /* object containing defn should be */ /* lazily-loaded */ #define SYMINFO_FLG_DIRECTBIND 0x0010 /* ref should be bound directly to */ /* object containing defn. */ #define SYMINFO_FLG_NOEXTDIRECT 0x0020 /* don't let an external reference */ /* directly bind to this symbol */ #define SYMINFO_FLG_FILTER 0x0002 /* symbol ref is associated to a */ #define SYMINFO_FLG_AUXILIARY 0x0040 /* standard or auxiliary filter */ /* * Syminfo.si_boundto values. */ #define SYMINFO_BT_SELF 0xffff /* symbol bound to self */ #define SYMINFO_BT_PARENT 0xfffe /* symbol bound to parent */ #define SYMINFO_BT_NONE 0xfffd /* no special symbol binding */ #define SYMINFO_BT_EXTERN 0xfffc /* symbol defined as external */ #define SYMINFO_BT_LOWRESERVE 0xff00 /* beginning of reserved entries */ /* * Syminfo version values. */ #define SYMINFO_NONE 0 /* Syminfo version */ #define SYMINFO_CURRENT 1 #define SYMINFO_NUM 2 /* Values for ch_type (compressed section headers). */ #define ELFCOMPRESS_ZLIB 1 /* ZLIB/DEFLATE */ #define ELFCOMPRESS_ZSTD 2 /* Zstandard */ #define ELFCOMPRESS_LOOS 0x60000000 /* OS-specific */ #define ELFCOMPRESS_HIOS 0x6fffffff #define ELFCOMPRESS_LOPROC 0x70000000 /* Processor-specific */ #define ELFCOMPRESS_HIPROC 0x7fffffff /* Values for a_type. */ #define AT_NULL 0 /* Terminates the vector. */ #define AT_IGNORE 1 /* Ignored entry. */ #define AT_EXECFD 2 /* File descriptor of program to load. */ #define AT_PHDR 3 /* Program header of program already loaded. */ #define AT_PHENT 4 /* Size of each program header entry. */ #define AT_PHNUM 5 /* Number of program header entries. */ #define AT_PAGESZ 6 /* Page size in bytes. */ #define AT_BASE 7 /* Interpreter's base address. */ #define AT_FLAGS 8 /* Flags. */ #define AT_ENTRY 9 /* Where interpreter should transfer control. */ #define AT_NOTELF 10 /* Program is not ELF ?? */ #define AT_UID 11 /* Real uid. */ #define AT_EUID 12 /* Effective uid. */ #define AT_GID 13 /* Real gid. */ #define AT_EGID 14 /* Effective gid. */ #define AT_EXECPATH 15 /* Path to the executable. */ #define AT_CANARY 16 /* Canary for SSP. */ #define AT_CANARYLEN 17 /* Length of the canary. */ #define AT_OSRELDATE 18 /* OSRELDATE. */ #define AT_NCPUS 19 /* Number of CPUs. */ #define AT_PAGESIZES 20 /* Pagesizes. */ #define AT_PAGESIZESLEN 21 /* Number of pagesizes. */ #define AT_TIMEKEEP 22 /* Pointer to timehands. */ #define AT_STACKPROT 23 /* Initial stack protection. */ #define AT_EHDRFLAGS 24 /* e_flags field from elf hdr */ #define AT_HWCAP 25 /* CPU feature flags. */ #define AT_HWCAP2 26 /* CPU feature flags 2. */ #define AT_BSDFLAGS 27 /* ELF BSD Flags. */ #define AT_ARGC 28 /* Argument count */ #define AT_ARGV 29 /* Argument vector */ #define AT_ENVC 30 /* Environment count */ #define AT_ENVV 31 /* Environment vector */ #define AT_PS_STRINGS 32 /* struct ps_strings */ #define AT_FXRNG 33 /* Pointer to root RNG seed version. */ #define AT_KPRELOAD 34 /* Base of vdso, preloaded by rtld */ #define AT_USRSTACKBASE 35 /* Top of user stack */ #define AT_USRSTACKLIM 36 /* Grow limit of user stack */ #define AT_CHERI_STATS 37 /* Reserved */ #define AT_COUNT 38 /* Count of defined aux entry types. */ /* * Relocation types. * * All machine architectures are defined here to allow tools on one to * handle others. */ #define R_386_NONE 0 /* No relocation. */ #define R_386_32 1 /* Add symbol value. */ #define R_386_PC32 2 /* Add PC-relative symbol value. */ #define R_386_GOT32 3 /* Add PC-relative GOT offset. */ #define R_386_PLT32 4 /* Add PC-relative PLT offset. */ #define R_386_COPY 5 /* Copy data from shared object. */ #define R_386_GLOB_DAT 6 /* Set GOT entry to data address. */ #define R_386_JMP_SLOT 7 /* Set GOT entry to code address. */ #define R_386_RELATIVE 8 /* Add load address of shared object. */ #define R_386_GOTOFF 9 /* Add GOT-relative symbol address. */ #define R_386_GOTPC 10 /* Add PC-relative GOT table address. */ #define R_386_32PLT 11 #define R_386_TLS_TPOFF 14 /* Negative offset in static TLS block */ #define R_386_TLS_IE 15 /* Absolute address of GOT for -ve static TLS */ #define R_386_TLS_GOTIE 16 /* GOT entry for negative static TLS block */ #define R_386_TLS_LE 17 /* Negative offset relative to static TLS */ #define R_386_TLS_GD 18 /* 32 bit offset to GOT (index,off) pair */ #define R_386_TLS_LDM 19 /* 32 bit offset to GOT (index,zero) pair */ #define R_386_16 20 #define R_386_PC16 21 #define R_386_8 22 #define R_386_PC8 23 #define R_386_TLS_GD_32 24 /* 32 bit offset to GOT (index,off) pair */ #define R_386_TLS_GD_PUSH 25 /* pushl instruction for Sun ABI GD sequence */ #define R_386_TLS_GD_CALL 26 /* call instruction for Sun ABI GD sequence */ #define R_386_TLS_GD_POP 27 /* popl instruction for Sun ABI GD sequence */ #define R_386_TLS_LDM_32 28 /* 32 bit offset to GOT (index,zero) pair */ #define R_386_TLS_LDM_PUSH 29 /* pushl instruction for Sun ABI LD sequence */ #define R_386_TLS_LDM_CALL 30 /* call instruction for Sun ABI LD sequence */ #define R_386_TLS_LDM_POP 31 /* popl instruction for Sun ABI LD sequence */ #define R_386_TLS_LDO_32 32 /* 32 bit offset from start of TLS block */ #define R_386_TLS_IE_32 33 /* 32 bit offset to GOT static TLS offset entry */ #define R_386_TLS_LE_32 34 /* 32 bit offset within static TLS block */ #define R_386_TLS_DTPMOD32 35 /* GOT entry containing TLS index */ #define R_386_TLS_DTPOFF32 36 /* GOT entry containing TLS offset */ #define R_386_TLS_TPOFF32 37 /* GOT entry of -ve static TLS offset */ #define R_386_SIZE32 38 #define R_386_TLS_GOTDESC 39 #define R_386_TLS_DESC_CALL 40 #define R_386_TLS_DESC 41 #define R_386_IRELATIVE 42 /* PLT entry resolved indirectly at runtime */ #define R_386_GOT32X 43 #define R_AARCH64_NONE 0 /* No relocation */ #define R_AARCH64_ABS64 257 /* Absolute offset */ #define R_AARCH64_ABS32 258 /* Absolute, 32-bit overflow check */ #define R_AARCH64_ABS16 259 /* Absolute, 16-bit overflow check */ #define R_AARCH64_PREL64 260 /* PC relative */ #define R_AARCH64_PREL32 261 /* PC relative, 32-bit overflow check */ #define R_AARCH64_PREL16 262 /* PC relative, 16-bit overflow check */ #define R_AARCH64_TSTBR14 279 /* TBZ/TBNZ immediate */ #define R_AARCH64_CONDBR19 280 /* Conditional branch immediate */ #define R_AARCH64_JUMP26 282 /* Branch immediate */ #define R_AARCH64_CALL26 283 /* Call immediate */ #define R_AARCH64_COPY 1024 /* Copy data from shared object */ #define R_AARCH64_GLOB_DAT 1025 /* Set GOT entry to data address */ #define R_AARCH64_JUMP_SLOT 1026 /* Set GOT entry to code address */ #define R_AARCH64_RELATIVE 1027 /* Add load address of shared object */ #define R_AARCH64_TLS_DTPREL64 1028 #define R_AARCH64_TLS_DTPMOD64 1029 #define R_AARCH64_TLS_TPREL64 1030 #define R_AARCH64_TLSDESC 1031 /* Identify the TLS descriptor */ #define R_AARCH64_IRELATIVE 1032 #define R_ARM_NONE 0 /* No relocation. */ #define R_ARM_PC24 1 #define R_ARM_ABS32 2 #define R_ARM_REL32 3 #define R_ARM_PC13 4 #define R_ARM_ABS16 5 #define R_ARM_ABS12 6 #define R_ARM_THM_ABS5 7 #define R_ARM_ABS8 8 #define R_ARM_SBREL32 9 #define R_ARM_THM_PC22 10 #define R_ARM_THM_PC8 11 #define R_ARM_AMP_VCALL9 12 #define R_ARM_SWI24 13 #define R_ARM_THM_SWI8 14 #define R_ARM_XPC25 15 #define R_ARM_THM_XPC22 16 /* TLS relocations */ #define R_ARM_TLS_DTPMOD32 17 /* ID of module containing symbol */ #define R_ARM_TLS_DTPOFF32 18 /* Offset in TLS block */ #define R_ARM_TLS_TPOFF32 19 /* Offset in static TLS block */ #define R_ARM_COPY 20 /* Copy data from shared object. */ #define R_ARM_GLOB_DAT 21 /* Set GOT entry to data address. */ #define R_ARM_JUMP_SLOT 22 /* Set GOT entry to code address. */ #define R_ARM_RELATIVE 23 /* Add load address of shared object. */ #define R_ARM_GOTOFF 24 /* Add GOT-relative symbol address. */ #define R_ARM_GOTPC 25 /* Add PC-relative GOT table address. */ #define R_ARM_GOT32 26 /* Add PC-relative GOT offset. */ #define R_ARM_PLT32 27 /* Add PC-relative PLT offset. */ #define R_ARM_GNU_VTENTRY 100 #define R_ARM_GNU_VTINHERIT 101 #define R_ARM_RSBREL32 250 #define R_ARM_THM_RPC22 251 #define R_ARM_RREL32 252 #define R_ARM_RABS32 253 #define R_ARM_RPC24 254 #define R_ARM_RBASE 255 /* Name Value Field Calculation */ #define R_IA_64_NONE 0 /* None */ #define R_IA_64_IMM14 0x21 /* immediate14 S + A */ #define R_IA_64_IMM22 0x22 /* immediate22 S + A */ #define R_IA_64_IMM64 0x23 /* immediate64 S + A */ #define R_IA_64_DIR32MSB 0x24 /* word32 MSB S + A */ #define R_IA_64_DIR32LSB 0x25 /* word32 LSB S + A */ #define R_IA_64_DIR64MSB 0x26 /* word64 MSB S + A */ #define R_IA_64_DIR64LSB 0x27 /* word64 LSB S + A */ #define R_IA_64_GPREL22 0x2a /* immediate22 @gprel(S + A) */ #define R_IA_64_GPREL64I 0x2b /* immediate64 @gprel(S + A) */ #define R_IA_64_GPREL32MSB 0x2c /* word32 MSB @gprel(S + A) */ #define R_IA_64_GPREL32LSB 0x2d /* word32 LSB @gprel(S + A) */ #define R_IA_64_GPREL64MSB 0x2e /* word64 MSB @gprel(S + A) */ #define R_IA_64_GPREL64LSB 0x2f /* word64 LSB @gprel(S + A) */ #define R_IA_64_LTOFF22 0x32 /* immediate22 @ltoff(S + A) */ #define R_IA_64_LTOFF64I 0x33 /* immediate64 @ltoff(S + A) */ #define R_IA_64_PLTOFF22 0x3a /* immediate22 @pltoff(S + A) */ #define R_IA_64_PLTOFF64I 0x3b /* immediate64 @pltoff(S + A) */ #define R_IA_64_PLTOFF64MSB 0x3e /* word64 MSB @pltoff(S + A) */ #define R_IA_64_PLTOFF64LSB 0x3f /* word64 LSB @pltoff(S + A) */ #define R_IA_64_FPTR64I 0x43 /* immediate64 @fptr(S + A) */ #define R_IA_64_FPTR32MSB 0x44 /* word32 MSB @fptr(S + A) */ #define R_IA_64_FPTR32LSB 0x45 /* word32 LSB @fptr(S + A) */ #define R_IA_64_FPTR64MSB 0x46 /* word64 MSB @fptr(S + A) */ #define R_IA_64_FPTR64LSB 0x47 /* word64 LSB @fptr(S + A) */ #define R_IA_64_PCREL60B 0x48 /* immediate60 form1 S + A - P */ #define R_IA_64_PCREL21B 0x49 /* immediate21 form1 S + A - P */ #define R_IA_64_PCREL21M 0x4a /* immediate21 form2 S + A - P */ #define R_IA_64_PCREL21F 0x4b /* immediate21 form3 S + A - P */ #define R_IA_64_PCREL32MSB 0x4c /* word32 MSB S + A - P */ #define R_IA_64_PCREL32LSB 0x4d /* word32 LSB S + A - P */ #define R_IA_64_PCREL64MSB 0x4e /* word64 MSB S + A - P */ #define R_IA_64_PCREL64LSB 0x4f /* word64 LSB S + A - P */ #define R_IA_64_LTOFF_FPTR22 0x52 /* immediate22 @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR64I 0x53 /* immediate64 @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR32MSB 0x54 /* word32 MSB @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR32LSB 0x55 /* word32 LSB @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR64MSB 0x56 /* word64 MSB @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR64LSB 0x57 /* word64 LSB @ltoff(@fptr(S + A)) */ #define R_IA_64_SEGREL32MSB 0x5c /* word32 MSB @segrel(S + A) */ #define R_IA_64_SEGREL32LSB 0x5d /* word32 LSB @segrel(S + A) */ #define R_IA_64_SEGREL64MSB 0x5e /* word64 MSB @segrel(S + A) */ #define R_IA_64_SEGREL64LSB 0x5f /* word64 LSB @segrel(S + A) */ #define R_IA_64_SECREL32MSB 0x64 /* word32 MSB @secrel(S + A) */ #define R_IA_64_SECREL32LSB 0x65 /* word32 LSB @secrel(S + A) */ #define R_IA_64_SECREL64MSB 0x66 /* word64 MSB @secrel(S + A) */ #define R_IA_64_SECREL64LSB 0x67 /* word64 LSB @secrel(S + A) */ #define R_IA_64_REL32MSB 0x6c /* word32 MSB BD + A */ #define R_IA_64_REL32LSB 0x6d /* word32 LSB BD + A */ #define R_IA_64_REL64MSB 0x6e /* word64 MSB BD + A */ #define R_IA_64_REL64LSB 0x6f /* word64 LSB BD + A */ #define R_IA_64_LTV32MSB 0x74 /* word32 MSB S + A */ #define R_IA_64_LTV32LSB 0x75 /* word32 LSB S + A */ #define R_IA_64_LTV64MSB 0x76 /* word64 MSB S + A */ #define R_IA_64_LTV64LSB 0x77 /* word64 LSB S + A */ #define R_IA_64_PCREL21BI 0x79 /* immediate21 form1 S + A - P */ #define R_IA_64_PCREL22 0x7a /* immediate22 S + A - P */ #define R_IA_64_PCREL64I 0x7b /* immediate64 S + A - P */ #define R_IA_64_IPLTMSB 0x80 /* function descriptor MSB special */ #define R_IA_64_IPLTLSB 0x81 /* function descriptor LSB special */ #define R_IA_64_SUB 0x85 /* immediate64 A - S */ #define R_IA_64_LTOFF22X 0x86 /* immediate22 special */ #define R_IA_64_LDXMOV 0x87 /* immediate22 special */ #define R_IA_64_TPREL14 0x91 /* imm14 @tprel(S + A) */ #define R_IA_64_TPREL22 0x92 /* imm22 @tprel(S + A) */ #define R_IA_64_TPREL64I 0x93 /* imm64 @tprel(S + A) */ #define R_IA_64_TPREL64MSB 0x96 /* word64 MSB @tprel(S + A) */ #define R_IA_64_TPREL64LSB 0x97 /* word64 LSB @tprel(S + A) */ #define R_IA_64_LTOFF_TPREL22 0x9a /* imm22 @ltoff(@tprel(S+A)) */ #define R_IA_64_DTPMOD64MSB 0xa6 /* word64 MSB @dtpmod(S + A) */ #define R_IA_64_DTPMOD64LSB 0xa7 /* word64 LSB @dtpmod(S + A) */ #define R_IA_64_LTOFF_DTPMOD22 0xaa /* imm22 @ltoff(@dtpmod(S+A)) */ #define R_IA_64_DTPREL14 0xb1 /* imm14 @dtprel(S + A) */ #define R_IA_64_DTPREL22 0xb2 /* imm22 @dtprel(S + A) */ #define R_IA_64_DTPREL64I 0xb3 /* imm64 @dtprel(S + A) */ #define R_IA_64_DTPREL32MSB 0xb4 /* word32 MSB @dtprel(S + A) */ #define R_IA_64_DTPREL32LSB 0xb5 /* word32 LSB @dtprel(S + A) */ #define R_IA_64_DTPREL64MSB 0xb6 /* word64 MSB @dtprel(S + A) */ #define R_IA_64_DTPREL64LSB 0xb7 /* word64 LSB @dtprel(S + A) */ #define R_IA_64_LTOFF_DTPREL22 0xba /* imm22 @ltoff(@dtprel(S+A)) */ #define R_MIPS_NONE 0 /* No reloc */ #define R_MIPS_16 1 /* Direct 16 bit */ #define R_MIPS_32 2 /* Direct 32 bit */ #define R_MIPS_REL32 3 /* PC relative 32 bit */ #define R_MIPS_26 4 /* Direct 26 bit shifted */ #define R_MIPS_HI16 5 /* High 16 bit */ #define R_MIPS_LO16 6 /* Low 16 bit */ #define R_MIPS_GPREL16 7 /* GP relative 16 bit */ #define R_MIPS_LITERAL 8 /* 16 bit literal entry */ #define R_MIPS_GOT16 9 /* 16 bit GOT entry */ #define R_MIPS_PC16 10 /* PC relative 16 bit */ #define R_MIPS_CALL16 11 /* 16 bit GOT entry for function */ #define R_MIPS_GPREL32 12 /* GP relative 32 bit */ #define R_MIPS_64 18 /* Direct 64 bit */ #define R_MIPS_GOT_DISP 19 #define R_MIPS_GOT_PAGE 20 #define R_MIPS_GOT_OFST 21 #define R_MIPS_GOT_HI16 22 /* GOT HI 16 bit */ #define R_MIPS_GOT_LO16 23 /* GOT LO 16 bit */ #define R_MIPS_SUB 24 #define R_MIPS_CALLHI16 30 /* upper 16 bit GOT entry for function */ #define R_MIPS_CALLLO16 31 /* lower 16 bit GOT entry for function */ #define R_MIPS_JALR 37 #define R_MIPS_TLS_GD 42 #define R_MIPS_COPY 126 #define R_MIPS_JUMP_SLOT 127 #define R_PPC_NONE 0 /* No relocation. */ #define R_PPC_ADDR32 1 #define R_PPC_ADDR24 2 #define R_PPC_ADDR16 3 #define R_PPC_ADDR16_LO 4 #define R_PPC_ADDR16_HI 5 #define R_PPC_ADDR16_HA 6 #define R_PPC_ADDR14 7 #define R_PPC_ADDR14_BRTAKEN 8 #define R_PPC_ADDR14_BRNTAKEN 9 #define R_PPC_REL24 10 #define R_PPC_REL14 11 #define R_PPC_REL14_BRTAKEN 12 #define R_PPC_REL14_BRNTAKEN 13 #define R_PPC_GOT16 14 #define R_PPC_GOT16_LO 15 #define R_PPC_GOT16_HI 16 #define R_PPC_GOT16_HA 17 #define R_PPC_PLTREL24 18 #define R_PPC_COPY 19 #define R_PPC_GLOB_DAT 20 #define R_PPC_JMP_SLOT 21 #define R_PPC_RELATIVE 22 #define R_PPC_LOCAL24PC 23 #define R_PPC_UADDR32 24 #define R_PPC_UADDR16 25 #define R_PPC_REL32 26 #define R_PPC_PLT32 27 #define R_PPC_PLTREL32 28 #define R_PPC_PLT16_LO 29 #define R_PPC_PLT16_HI 30 #define R_PPC_PLT16_HA 31 #define R_PPC_SDAREL16 32 #define R_PPC_SECTOFF 33 #define R_PPC_SECTOFF_LO 34 #define R_PPC_SECTOFF_HI 35 #define R_PPC_SECTOFF_HA 36 #define R_PPC_IRELATIVE 248 /* * 64-bit relocations */ #define R_PPC64_ADDR64 38 #define R_PPC64_ADDR16_HIGHER 39 #define R_PPC64_ADDR16_HIGHERA 40 #define R_PPC64_ADDR16_HIGHEST 41 #define R_PPC64_ADDR16_HIGHESTA 42 #define R_PPC64_UADDR64 43 #define R_PPC64_REL64 44 #define R_PPC64_PLT64 45 #define R_PPC64_PLTREL64 46 #define R_PPC64_TOC16 47 #define R_PPC64_TOC16_LO 48 #define R_PPC64_TOC16_HI 49 #define R_PPC64_TOC16_HA 50 #define R_PPC64_TOC 51 #define R_PPC64_DTPMOD64 68 #define R_PPC64_TPREL64 73 #define R_PPC64_DTPREL64 78 /* * TLS relocations */ #define R_PPC_TLS 67 #define R_PPC_DTPMOD32 68 #define R_PPC_TPREL16 69 #define R_PPC_TPREL16_LO 70 #define R_PPC_TPREL16_HI 71 #define R_PPC_TPREL16_HA 72 #define R_PPC_TPREL32 73 #define R_PPC_DTPREL16 74 #define R_PPC_DTPREL16_LO 75 #define R_PPC_DTPREL16_HI 76 #define R_PPC_DTPREL16_HA 77 #define R_PPC_DTPREL32 78 #define R_PPC_GOT_TLSGD16 79 #define R_PPC_GOT_TLSGD16_LO 80 #define R_PPC_GOT_TLSGD16_HI 81 #define R_PPC_GOT_TLSGD16_HA 82 #define R_PPC_GOT_TLSLD16 83 #define R_PPC_GOT_TLSLD16_LO 84 #define R_PPC_GOT_TLSLD16_HI 85 #define R_PPC_GOT_TLSLD16_HA 86 #define R_PPC_GOT_TPREL16 87 #define R_PPC_GOT_TPREL16_LO 88 #define R_PPC_GOT_TPREL16_HI 89 #define R_PPC_GOT_TPREL16_HA 90 /* * The remaining relocs are from the Embedded ELF ABI, and are not in the * SVR4 ELF ABI. */ #define R_PPC_EMB_NADDR32 101 #define R_PPC_EMB_NADDR16 102 #define R_PPC_EMB_NADDR16_LO 103 #define R_PPC_EMB_NADDR16_HI 104 #define R_PPC_EMB_NADDR16_HA 105 #define R_PPC_EMB_SDAI16 106 #define R_PPC_EMB_SDA2I16 107 #define R_PPC_EMB_SDA2REL 108 #define R_PPC_EMB_SDA21 109 #define R_PPC_EMB_MRKREF 110 #define R_PPC_EMB_RELSEC16 111 #define R_PPC_EMB_RELST_LO 112 #define R_PPC_EMB_RELST_HI 113 #define R_PPC_EMB_RELST_HA 114 #define R_PPC_EMB_BIT_FLD 115 #define R_PPC_EMB_RELSDA 116 /* * RISC-V relocation types. */ /* Relocation types used by the dynamic linker. */ #define R_RISCV_NONE 0 #define R_RISCV_32 1 #define R_RISCV_64 2 #define R_RISCV_RELATIVE 3 #define R_RISCV_COPY 4 #define R_RISCV_JUMP_SLOT 5 #define R_RISCV_TLS_DTPMOD32 6 #define R_RISCV_TLS_DTPMOD64 7 #define R_RISCV_TLS_DTPREL32 8 #define R_RISCV_TLS_DTPREL64 9 #define R_RISCV_TLS_TPREL32 10 #define R_RISCV_TLS_TPREL64 11 /* Relocation types not used by the dynamic linker. */ #define R_RISCV_BRANCH 16 #define R_RISCV_JAL 17 #define R_RISCV_CALL 18 #define R_RISCV_CALL_PLT 19 #define R_RISCV_GOT_HI20 20 #define R_RISCV_TLS_GOT_HI20 21 #define R_RISCV_TLS_GD_HI20 22 #define R_RISCV_PCREL_HI20 23 #define R_RISCV_PCREL_LO12_I 24 #define R_RISCV_PCREL_LO12_S 25 #define R_RISCV_HI20 26 #define R_RISCV_LO12_I 27 #define R_RISCV_LO12_S 28 #define R_RISCV_TPREL_HI20 29 #define R_RISCV_TPREL_LO12_I 30 #define R_RISCV_TPREL_LO12_S 31 #define R_RISCV_TPREL_ADD 32 #define R_RISCV_ADD8 33 #define R_RISCV_ADD16 34 #define R_RISCV_ADD32 35 #define R_RISCV_ADD64 36 #define R_RISCV_SUB8 37 #define R_RISCV_SUB16 38 #define R_RISCV_SUB32 39 #define R_RISCV_SUB64 40 #define R_RISCV_ALIGN 43 #define R_RISCV_RVC_BRANCH 44 #define R_RISCV_RVC_JUMP 45 #define R_RISCV_RVC_LUI 46 #define R_RISCV_RELAX 51 #define R_RISCV_SUB6 52 #define R_RISCV_SET6 53 #define R_RISCV_SET8 54 #define R_RISCV_SET16 55 #define R_RISCV_SET32 56 #define R_RISCV_32_PCREL 57 #define R_RISCV_IRELATIVE 58 /* * Loongson LoongArch relocation types. * * LoongArch ELF psABI: https://github.com/loongson/LoongArch-Documentation * (commit hash 9b3bd9f4a497115913c22f1a2a47863798fbc02a) */ /* Relocation types used by the dynamic linker */ #define R_LARCH_NONE 0 #define R_LARCH_32 1 #define R_LARCH_64 2 #define R_LARCH_RELATIVE 3 #define R_LARCH_COPY 4 #define R_LARCH_JUMP_SLOT 5 #define R_LARCH_TLS_DTPMOD32 6 #define R_LARCH_TLS_DTPMOD64 7 #define R_LARCH_TLS_DTPREL32 8 #define R_LARCH_TLS_DTPREL64 9 #define R_LARCH_TLS_TPREL32 10 #define R_LARCH_TLS_TPREL64 11 #define R_LARCH_IRELATIVE 12 #define R_LARCH_MARK_LA 20 #define R_LARCH_MARK_PCREL 21 #define R_LARCH_SOP_PUSH_PCREL 22 #define R_LARCH_SOP_PUSH_ABSOLUTE 23 #define R_LARCH_SOP_PUSH_DUP 24 #define R_LARCH_SOP_PUSH_GPREL 25 #define R_LARCH_SOP_PUSH_TLS_TPREL 26 #define R_LARCH_SOP_PUSH_TLS_GOT 27 #define R_LARCH_SOP_PUSH_TLS_GD 28 #define R_LARCH_SOP_PUSH_PLT_PCREL 29 #define R_LARCH_SOP_ASSERT 30 #define R_LARCH_SOP_NOT 31 #define R_LARCH_SOP_SUB 32 #define R_LARCH_SOP_SL 33 #define R_LARCH_SOP_SR 34 #define R_LARCH_SOP_ADD 35 #define R_LARCH_SOP_AND 36 #define R_LARCH_SOP_IF_ELSE 37 #define R_LARCH_SOP_POP_32_S_10_5 38 #define R_LARCH_SOP_POP_32_U_10_12 39 #define R_LARCH_SOP_POP_32_S_10_12 40 #define R_LARCH_SOP_POP_32_S_10_16 41 #define R_LARCH_SOP_POP_32_S_10_16_S2 42 #define R_LARCH_SOP_POP_32_S_5_20 43 #define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 #define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 #define R_LARCH_SOP_POP_32_U 46 #define R_LARCH_ADD8 47 #define R_LARCH_ADD16 48 #define R_LARCH_ADD24 49 #define R_LARCH_ADD32 50 #define R_LARCH_ADD64 51 #define R_LARCH_SUB8 52 #define R_LARCH_SUB16 53 #define R_LARCH_SUB24 54 #define R_LARCH_SUB32 55 #define R_LARCH_SUB64 56 #define R_LARCH_GNU_VTINHERIT 57 #define R_LARCH_GNU_VTENTRY 58 /* * Relocs whose processing do not require a stack machine. * * Spec addition: https://github.com/loongson/LoongArch-Documentation/pull/57 */ #define R_LARCH_B16 64 #define R_LARCH_B21 65 #define R_LARCH_B26 66 #define R_LARCH_ABS_HI20 67 #define R_LARCH_ABS_LO12 68 #define R_LARCH_ABS64_LO20 69 #define R_LARCH_ABS64_HI12 70 #define R_LARCH_PCALA_HI20 71 #define R_LARCH_PCALA_LO12 72 #define R_LARCH_PCALA64_LO20 73 #define R_LARCH_PCALA64_HI12 74 #define R_LARCH_GOT_PC_HI20 75 #define R_LARCH_GOT_PC_LO12 76 #define R_LARCH_GOT64_PC_LO20 77 #define R_LARCH_GOT64_PC_HI12 78 #define R_LARCH_GOT_HI20 79 #define R_LARCH_GOT_LO12 80 #define R_LARCH_GOT64_LO20 81 #define R_LARCH_GOT64_HI12 82 #define R_LARCH_TLS_LE_HI20 83 #define R_LARCH_TLS_LE_LO12 84 #define R_LARCH_TLS_LE64_LO20 85 #define R_LARCH_TLS_LE64_HI12 86 #define R_LARCH_TLS_IE_PC_HI20 87 #define R_LARCH_TLS_IE_PC_LO12 88 #define R_LARCH_TLS_IE64_PC_LO20 89 #define R_LARCH_TLS_IE64_PC_HI12 90 #define R_LARCH_TLS_IE_HI20 91 #define R_LARCH_TLS_IE_LO12 92 #define R_LARCH_TLS_IE64_LO20 93 #define R_LARCH_TLS_IE64_HI12 94 #define R_LARCH_TLS_LD_PC_HI20 95 #define R_LARCH_TLS_LD_HI20 96 #define R_LARCH_TLS_GD_PC_HI20 97 #define R_LARCH_TLS_GD_HI20 98 #define R_LARCH_32_PCREL 99 #define R_LARCH_RELAX 100 /* * Relocs added in ELF for the LoongArch™ Architecture v20230519, part of the * v2.10 LoongArch ABI specs. * * Spec addition: https://github.com/loongson/la-abi-specs/pull/1 * * Note that the 101 and 104 relocation numbers are defined as R_LARCH_DELETE * and R_LARCH_CFA respectively in psABI 2.10. But they are marked as reserved * in psABI v2.20 because they were proved not necessary to be exposed outside * of the linker. */ #define R_LARCH_ALIGN 102 #define R_LARCH_PCREL20_S2 103 #define R_LARCH_ADD6 105 #define R_LARCH_SUB6 106 #define R_LARCH_ADD_ULEB128 107 #define R_LARCH_SUB_ULEB128 108 #define R_LARCH_64_PCREL 109 /* * Relocs added in ELF for the LoongArch™ Architecture v20231102, part of the * v2.20 LoongArch ABI specs. * * Spec addition: https://github.com/loongson/la-abi-specs/pull/4 */ #define R_LARCH_CALL36 110 /* * Relocs added in ELF for the LoongArch™ Architecture v20231219, part of the * v2.30 LoongArch ABI specs. * * Spec addition: https://github.com/loongson/la-abi-specs/pull/5 */ #define R_LARCH_TLS_DESC32 13 #define R_LARCH_TLS_DESC64 14 #define R_LARCH_TLS_DESC_PC_HI20 111 #define R_LARCH_TLS_DESC_PC_LO12 112 #define R_LARCH_TLS_DESC64_PC_LO20 113 #define R_LARCH_TLS_DESC64_PC_HI12 114 #define R_LARCH_TLS_DESC_HI20 115 #define R_LARCH_TLS_DESC_LO12 116 #define R_LARCH_TLS_DESC64_LO20 117 #define R_LARCH_TLS_DESC64_HI12 118 #define R_LARCH_TLS_DESC_LD 119 #define R_LARCH_TLS_DESC_CALL 120 #define R_LARCH_TLS_LE_HI20_R 121 #define R_LARCH_TLS_LE_ADD_R 122 #define R_LARCH_TLS_LE_LO12_R 123 #define R_LARCH_TLS_LD_PCREL20_S2 124 #define R_LARCH_TLS_GD_PCREL20_S2 125 #define R_LARCH_TLS_DESC_PCREL20_S2 126 #define R_SPARC_NONE 0 #define R_SPARC_8 1 #define R_SPARC_16 2 #define R_SPARC_32 3 #define R_SPARC_DISP8 4 #define R_SPARC_DISP16 5 #define R_SPARC_DISP32 6 #define R_SPARC_WDISP30 7 #define R_SPARC_WDISP22 8 #define R_SPARC_HI22 9 #define R_SPARC_22 10 #define R_SPARC_13 11 #define R_SPARC_LO10 12 #define R_SPARC_GOT10 13 #define R_SPARC_GOT13 14 #define R_SPARC_GOT22 15 #define R_SPARC_PC10 16 #define R_SPARC_PC22 17 #define R_SPARC_WPLT30 18 #define R_SPARC_COPY 19 #define R_SPARC_GLOB_DAT 20 #define R_SPARC_JMP_SLOT 21 #define R_SPARC_RELATIVE 22 #define R_SPARC_UA32 23 #define R_SPARC_PLT32 24 #define R_SPARC_HIPLT22 25 #define R_SPARC_LOPLT10 26 #define R_SPARC_PCPLT32 27 #define R_SPARC_PCPLT22 28 #define R_SPARC_PCPLT10 29 #define R_SPARC_10 30 #define R_SPARC_11 31 #define R_SPARC_64 32 #define R_SPARC_OLO10 33 #define R_SPARC_HH22 34 #define R_SPARC_HM10 35 #define R_SPARC_LM22 36 #define R_SPARC_PC_HH22 37 #define R_SPARC_PC_HM10 38 #define R_SPARC_PC_LM22 39 #define R_SPARC_WDISP16 40 #define R_SPARC_WDISP19 41 #define R_SPARC_GLOB_JMP 42 #define R_SPARC_7 43 #define R_SPARC_5 44 #define R_SPARC_6 45 #define R_SPARC_DISP64 46 #define R_SPARC_PLT64 47 #define R_SPARC_HIX22 48 #define R_SPARC_LOX10 49 #define R_SPARC_H44 50 #define R_SPARC_M44 51 #define R_SPARC_L44 52 #define R_SPARC_REGISTER 53 #define R_SPARC_UA64 54 #define R_SPARC_UA16 55 #define R_SPARC_TLS_GD_HI22 56 #define R_SPARC_TLS_GD_LO10 57 #define R_SPARC_TLS_GD_ADD 58 #define R_SPARC_TLS_GD_CALL 59 #define R_SPARC_TLS_LDM_HI22 60 #define R_SPARC_TLS_LDM_LO10 61 #define R_SPARC_TLS_LDM_ADD 62 #define R_SPARC_TLS_LDM_CALL 63 #define R_SPARC_TLS_LDO_HIX22 64 #define R_SPARC_TLS_LDO_LOX10 65 #define R_SPARC_TLS_LDO_ADD 66 #define R_SPARC_TLS_IE_HI22 67 #define R_SPARC_TLS_IE_LO10 68 #define R_SPARC_TLS_IE_LD 69 #define R_SPARC_TLS_IE_LDX 70 #define R_SPARC_TLS_IE_ADD 71 #define R_SPARC_TLS_LE_HIX22 72 #define R_SPARC_TLS_LE_LOX10 73 #define R_SPARC_TLS_DTPMOD32 74 #define R_SPARC_TLS_DTPMOD64 75 #define R_SPARC_TLS_DTPOFF32 76 #define R_SPARC_TLS_DTPOFF64 77 #define R_SPARC_TLS_TPOFF32 78 #define R_SPARC_TLS_TPOFF64 79 #define R_X86_64_NONE 0 /* No relocation. */ #define R_X86_64_64 1 /* Add 64 bit symbol value. */ #define R_X86_64_PC32 2 /* PC-relative 32 bit signed sym value. */ #define R_X86_64_GOT32 3 /* PC-relative 32 bit GOT offset. */ #define R_X86_64_PLT32 4 /* PC-relative 32 bit PLT offset. */ #define R_X86_64_COPY 5 /* Copy data from shared object. */ #define R_X86_64_GLOB_DAT 6 /* Set GOT entry to data address. */ #define R_X86_64_JMP_SLOT 7 /* Set GOT entry to code address. */ #define R_X86_64_RELATIVE 8 /* Add load address of shared object. */ #define R_X86_64_GOTPCREL 9 /* Add 32 bit signed pcrel offset to GOT. */ #define R_X86_64_32 10 /* Add 32 bit zero extended symbol value */ #define R_X86_64_32S 11 /* Add 32 bit sign extended symbol value */ #define R_X86_64_16 12 /* Add 16 bit zero extended symbol value */ #define R_X86_64_PC16 13 /* Add 16 bit signed extended pc relative symbol value */ #define R_X86_64_8 14 /* Add 8 bit zero extended symbol value */ #define R_X86_64_PC8 15 /* Add 8 bit signed extended pc relative symbol value */ #define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */ #define R_X86_64_DTPOFF64 17 /* Offset in TLS block */ #define R_X86_64_TPOFF64 18 /* Offset in static TLS block */ #define R_X86_64_TLSGD 19 /* PC relative offset to GD GOT entry */ #define R_X86_64_TLSLD 20 /* PC relative offset to LD GOT entry */ #define R_X86_64_DTPOFF32 21 /* Offset in TLS block */ #define R_X86_64_GOTTPOFF 22 /* PC relative offset to IE GOT entry */ #define R_X86_64_TPOFF32 23 /* Offset in static TLS block */ #define R_X86_64_PC64 24 /* PC-relative 64 bit signed sym value. */ #define R_X86_64_GOTOFF64 25 #define R_X86_64_GOTPC32 26 #define R_X86_64_GOT64 27 #define R_X86_64_GOTPCREL64 28 #define R_X86_64_GOTPC64 29 #define R_X86_64_GOTPLT64 30 #define R_X86_64_PLTOFF64 31 #define R_X86_64_SIZE32 32 #define R_X86_64_SIZE64 33 #define R_X86_64_GOTPC32_TLSDESC 34 #define R_X86_64_TLSDESC_CALL 35 #define R_X86_64_TLSDESC 36 #define R_X86_64_IRELATIVE 37 #define R_X86_64_RELATIVE64 38 /* 39 and 40 were BND-related, already decomissioned */ #define R_X86_64_GOTPCRELX 41 #define R_X86_64_REX_GOTPCRELX 42 #define ELF_BSDF_SIGFASTBLK 0x0001 /* Kernel supports fast sigblock */ #define ELF_BSDF_VMNOOVERCOMMIT 0x0002 #endif /* !_SYS_ELF_COMMON_H_ */ diff --git a/sys/sys/user.h b/sys/sys/user.h index c053441c9e6a..8396f827e9ed 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -1,729 +1,731 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2007 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_USER_H_ #define _SYS_USER_H_ #include #ifndef _KERNEL /* stuff that *used* to be included by user.h, or is now needed */ #include #include #include #include #include #include #include #include #include #include #include /* XXX */ #include /* XXX */ #include /* XXX */ #include /* XXX */ #endif /* !_KERNEL */ #ifndef _SYS_RESOURCEVAR_H_ #include #endif #ifndef _SYS_SIGNALVAR_H_ #include #endif #ifndef _SYS_SOCKET_VAR_H_ #include #endif #include /* * KERN_PROC subtype ops return arrays of selected proc structure entries: * * This struct includes several arrays of spare space, with different arrays * for different standard C-types. When adding new variables to this struct, * the space for byte-aligned data should be taken from the ki_sparestring, * pointers from ki_spareptrs, word-aligned data from ki_spareints, and * doubleword-aligned data from ki_sparelongs. Make sure the space for new * variables come from the array which matches the size and alignment of * those variables on ALL hardware platforms, and then adjust the appropriate * KI_NSPARE_* value(s) to match. * * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all * platforms after you have added new variables. Note that if you change * the value of KINFO_PROC_SIZE, then many userland programs will stop * working until they are recompiled! * * Once you have added the new field, you will need to add code to initialize * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and * function kvm_proclist in lib/libkvm/kvm_proc.c . */ #define KI_NSPARE_INT 2 #define KI_NSPARE_LONG 12 #define KI_NSPARE_PTR 5 #ifndef _KERNEL #ifndef KINFO_PROC_SIZE #error "Unknown architecture" #endif #endif /* !_KERNEL */ #define WMESGLEN 8 /* size of returned wchan message */ #define LOCKNAMELEN 8 /* size of returned lock name */ #define TDNAMLEN 16 /* size of returned thread name */ #define COMMLEN 19 /* size of returned ki_comm name */ #define KI_EMULNAMELEN 16 /* size of returned ki_emul */ #define KI_NGROUPS 16 /* number of groups in ki_groups */ #define LOGNAMELEN 17 /* size of returned ki_login */ #define LOGINCLASSLEN 17 /* size of returned ki_loginclass */ #ifndef BURN_BRIDGES #define OCOMMLEN TDNAMLEN #define ki_ocomm ki_tdname #endif /* Flags for the process credential. */ #define KI_CRF_CAPABILITY_MODE 0x00000001 /* * Steal a bit from ki_cr_flags to indicate that the cred had more than * KI_NGROUPS groups. */ #define KI_CRF_GRP_OVERFLOW 0x80000000 struct kinfo_proc { int ki_structsize; /* size of this structure */ int ki_layout; /* reserved: layout identifier */ struct pargs *ki_args; /* address of command arguments */ struct proc *ki_paddr; /* address of proc */ struct user *ki_addr; /* kernel virtual addr of u-area */ struct vnode *ki_tracep; /* pointer to trace file */ struct vnode *ki_textvp; /* pointer to executable file */ struct filedesc *ki_fd; /* pointer to open file info */ struct vmspace *ki_vmspace; /* pointer to kernel vmspace struct */ const void *ki_wchan; /* sleep address */ pid_t ki_pid; /* Process identifier */ pid_t ki_ppid; /* parent process id */ pid_t ki_pgid; /* process group id */ pid_t ki_tpgid; /* tty process group id */ pid_t ki_sid; /* Process session ID */ pid_t ki_tsid; /* Terminal session ID */ short ki_jobc; /* job control counter */ short ki_spare_short1; /* unused (just here for alignment) */ uint32_t ki_tdev_freebsd11; /* controlling tty dev */ sigset_t ki_siglist; /* Signals arrived but not delivered */ sigset_t ki_sigmask; /* Current signal mask */ sigset_t ki_sigignore; /* Signals being ignored */ sigset_t ki_sigcatch; /* Signals being caught by user */ uid_t ki_uid; /* effective user id */ uid_t ki_ruid; /* Real user id */ uid_t ki_svuid; /* Saved effective user id */ gid_t ki_rgid; /* Real group id */ gid_t ki_svgid; /* Saved effective group id */ short ki_ngroups; /* number of groups */ short ki_spare_short2; /* unused (just here for alignment) */ gid_t ki_groups[KI_NGROUPS]; /* groups */ vm_size_t ki_size; /* virtual size */ segsz_t ki_rssize; /* current resident set size in pages */ segsz_t ki_swrss; /* resident set size before last swap */ segsz_t ki_tsize; /* text size (pages) XXX */ segsz_t ki_dsize; /* data size (pages) XXX */ segsz_t ki_ssize; /* stack size (pages) */ u_short ki_xstat; /* Exit status for wait & stop signal */ u_short ki_acflag; /* Accounting flags */ fixpt_t ki_pctcpu; /* %cpu for process during ki_swtime */ u_int ki_estcpu; /* Time averaged value of ki_cpticks */ u_int ki_slptime; /* Time since last blocked */ u_int ki_swtime; /* Time swapped in or out */ u_int ki_cow; /* number of copy-on-write faults */ u_int64_t ki_runtime; /* Real time in microsec */ struct timeval ki_start; /* starting time */ struct timeval ki_childtime; /* time used by process children */ long ki_flag; /* P_* flags */ long ki_kiflag; /* KI_* flags (below) */ int ki_traceflag; /* Kernel trace points */ char ki_stat; /* S* process status */ signed char ki_nice; /* Process "nice" value */ char ki_lock; /* Process lock (prevent swap) count */ char ki_rqindex; /* Run queue index */ u_char ki_oncpu_old; /* Which cpu we are on (legacy) */ u_char ki_lastcpu_old; /* Last cpu we were on (legacy) */ char ki_tdname[TDNAMLEN+1]; /* thread name */ char ki_wmesg[WMESGLEN+1]; /* wchan message */ char ki_login[LOGNAMELEN+1]; /* setlogin name */ char ki_lockname[LOCKNAMELEN+1]; /* lock name */ char ki_comm[COMMLEN+1]; /* command name */ char ki_emul[KI_EMULNAMELEN+1]; /* emulation name */ char ki_loginclass[LOGINCLASSLEN+1]; /* login class */ char ki_moretdname[MAXCOMLEN-TDNAMLEN+1]; /* more thread name */ /* * When adding new variables, take space for char-strings from the * front of ki_sparestrings, and ints from the end of ki_spareints. * That way the spare room from both arrays will remain contiguous. */ char ki_sparestrings[46]; /* spare string space */ int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */ uint64_t ki_tdev; /* controlling tty dev */ int ki_oncpu; /* Which cpu we are on */ int ki_lastcpu; /* Last cpu we were on */ int ki_tracer; /* Pid of tracing process */ int ki_flag2; /* P2_* flags */ int ki_fibnum; /* Default FIB number */ u_int ki_cr_flags; /* Credential flags */ int ki_jid; /* Process jail ID */ int ki_numthreads; /* XXXKSE number of threads in total */ lwpid_t ki_tid; /* XXXKSE thread id */ struct priority ki_pri; /* process priority */ struct rusage ki_rusage; /* process rusage statistics */ /* XXX - most fields in ki_rusage_ch are not (yet) filled in */ struct rusage ki_rusage_ch; /* rusage of children processes */ struct pcb *ki_pcb; /* kernel virtual addr of pcb */ void *ki_kstack; /* kernel virtual addr of stack */ void *ki_udata; /* User convenience pointer */ struct thread *ki_tdaddr; /* address of thread */ /* * When adding new variables, take space for pointers from the * front of ki_spareptrs, and longs from the end of ki_sparelongs. * That way the spare room from both arrays will remain contiguous. */ struct pwddesc *ki_pd; /* pointer to process paths info */ void *ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ long ki_sparelongs[KI_NSPARE_LONG]; /* spare room for growth */ long ki_sflag; /* PS_* flags */ long ki_tdflags; /* XXXKSE kthread flag */ }; void fill_kinfo_proc(struct proc *, struct kinfo_proc *); /* XXX - the following two defines are temporary */ #define ki_childstime ki_rusage_ch.ru_stime #define ki_childutime ki_rusage_ch.ru_utime /* * Legacy PS_ flag. This moved to p_flag but is maintained for * compatibility. */ #define PS_INMEM 0x00001 /* Loaded into memory, always true. */ /* ki_sessflag values */ #define KI_CTTY 0x00000001 /* controlling tty vnode active */ #define KI_SLEADER 0x00000002 /* session leader */ #define KI_LOCKBLOCK 0x00000004 /* proc blocked on lock ki_lockname */ /* * This used to be the per-process structure containing data that * isn't needed in core when the process is swapped out, but now it * remains only for the benefit of a.out core dumps. */ struct user { struct pstats u_stats; /* *p_stats */ struct kinfo_proc u_kproc; /* eproc */ }; /* * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor * array of another process. */ #define KF_ATTR_VALID 0x0001 #define KF_TYPE_NONE 0 #define KF_TYPE_VNODE 1 #define KF_TYPE_SOCKET 2 #define KF_TYPE_PIPE 3 #define KF_TYPE_FIFO 4 #define KF_TYPE_KQUEUE 5 /* was KF_TYPE_CRYPTO 6 */ #define KF_TYPE_MQUEUE 7 #define KF_TYPE_SHM 8 #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 #define KF_TYPE_PROCDESC 11 #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 #define KF_VTYPE_VREG 1 #define KF_VTYPE_VDIR 2 #define KF_VTYPE_VBLK 3 #define KF_VTYPE_VCHR 4 #define KF_VTYPE_VLNK 5 #define KF_VTYPE_VSOCK 6 #define KF_VTYPE_VFIFO 7 #define KF_VTYPE_VBAD 8 #define KF_VTYPE_UNKNOWN 255 #define KF_FD_TYPE_CWD -1 /* Current working directory */ #define KF_FD_TYPE_ROOT -2 /* Root directory */ #define KF_FD_TYPE_JAIL -3 /* Jail directory */ #define KF_FD_TYPE_TRACE -4 /* Ktrace vnode */ #define KF_FD_TYPE_TEXT -5 /* Text vnode */ #define KF_FD_TYPE_CTTY -6 /* Controlling terminal */ #define KF_FLAG_READ 0x00000001 #define KF_FLAG_WRITE 0x00000002 #define KF_FLAG_APPEND 0x00000004 #define KF_FLAG_ASYNC 0x00000008 #define KF_FLAG_FSYNC 0x00000010 #define KF_FLAG_NONBLOCK 0x00000020 #define KF_FLAG_DIRECT 0x00000040 #define KF_FLAG_HASLOCK 0x00000080 #define KF_FLAG_SHLOCK 0x00000100 #define KF_FLAG_EXLOCK 0x00000200 #define KF_FLAG_NOFOLLOW 0x00000400 #define KF_FLAG_CREAT 0x00000800 #define KF_FLAG_TRUNC 0x00001000 #define KF_FLAG_EXCL 0x00002000 #define KF_FLAG_EXEC 0x00004000 /* * Old format. Has variable hidden padding due to alignment. * This is a compatibility hack for pre-build 7.1 packages. */ #if defined(__amd64__) #define KINFO_OFILE_SIZE 1328 #endif #if defined(__i386__) #define KINFO_OFILE_SIZE 1324 #endif struct kinfo_ofile { int kf_structsize; /* Size of kinfo_file. */ int kf_type; /* Descriptor type. */ int kf_fd; /* Array index. */ int kf_ref_count; /* Reference count. */ int kf_flags; /* Flags. */ /* XXX Hidden alignment padding here on amd64 */ off_t kf_offset; /* Seek location. */ int kf_vnode_type; /* Vnode type. */ int kf_sock_domain; /* Socket domain. */ int kf_sock_type; /* Socket type. */ int kf_sock_protocol; /* Socket protocol. */ char kf_path[PATH_MAX]; /* Path to file, if any. */ struct sockaddr_storage kf_sa_local; /* Socket address. */ struct sockaddr_storage kf_sa_peer; /* Peer address. */ }; #if defined(__amd64__) || defined(__i386__) /* * This size should never be changed. If you really need to, you must provide * backward ABI compatibility by allocating a new sysctl MIB that will return * the new structure. The current structure has to be returned by the current * sysctl MIB. See how it is done for the kinfo_ofile structure. */ #define KINFO_FILE_SIZE 1392 #endif struct kinfo_file { int kf_structsize; /* Variable size of record. */ int kf_type; /* Descriptor type. */ int kf_fd; /* Array index. */ int kf_ref_count; /* Reference count. */ int kf_flags; /* Flags. */ int kf_pad0; /* Round to 64 bit alignment. */ int64_t kf_offset; /* Seek location. */ union { struct { /* API compatibility with FreeBSD < 12. */ int kf_vnode_type; int kf_sock_domain; int kf_sock_type; int kf_sock_protocol; struct sockaddr_storage kf_sa_local; struct sockaddr_storage kf_sa_peer; }; union { struct { /* Sendq size */ uint32_t kf_sock_sendq; /* Socket domain. */ int kf_sock_domain0; /* Socket type. */ int kf_sock_type0; /* Socket protocol. */ int kf_sock_protocol0; /* Socket address. */ struct sockaddr_storage kf_sa_local; /* Peer address. */ struct sockaddr_storage kf_sa_peer; /* Address of so_pcb. */ uint64_t kf_sock_pcb; /* Obsolete! May be reused as a spare. */ uint64_t kf_sock_inpcb; /* Address of unp_conn. */ uint64_t kf_sock_unpconn; /* Send buffer state. */ uint16_t kf_sock_snd_sb_state; /* Receive buffer state. */ uint16_t kf_sock_rcv_sb_state; /* Recvq size. */ uint32_t kf_sock_recvq; } kf_sock; struct { /* Vnode type. */ int kf_file_type; /* Space for future use */ int kf_spareint[3]; uint64_t kf_spareint64[29]; /* Number of references to file. */ uint64_t kf_file_nlink; /* Vnode filesystem id. */ uint64_t kf_file_fsid; /* File device. */ uint64_t kf_file_rdev; /* Global file id. */ uint64_t kf_file_fileid; /* File size. */ uint64_t kf_file_size; /* Vnode filesystem id, FreeBSD 11 compat. */ uint32_t kf_file_fsid_freebsd11; /* File device, FreeBSD 11 compat. */ uint32_t kf_file_rdev_freebsd11; /* File mode. */ uint16_t kf_file_mode; /* Round to 64 bit alignment. */ uint16_t kf_file_pad0; uint32_t kf_file_pad1; } kf_file; struct { uint32_t kf_spareint[4]; uint64_t kf_spareint64[32]; uint32_t kf_sem_value; uint16_t kf_sem_mode; } kf_sem; struct { uint32_t kf_spareint[4]; uint64_t kf_spareint64[32]; uint64_t kf_pipe_addr; uint64_t kf_pipe_peer; uint32_t kf_pipe_buffer_cnt; uint32_t kf_pipe_buffer_in; uint32_t kf_pipe_buffer_out; uint32_t kf_pipe_buffer_size; } kf_pipe; struct { uint32_t kf_spareint[4]; uint64_t kf_spareint64[32]; uint32_t kf_pts_dev_freebsd11; uint32_t kf_pts_pad0; uint64_t kf_pts_dev; /* Round to 64 bit alignment. */ uint32_t kf_pts_pad1[4]; } kf_pts; struct { uint32_t kf_spareint[4]; uint64_t kf_spareint64[32]; pid_t kf_pid; } kf_proc; struct { uint64_t kf_eventfd_value; uint32_t kf_eventfd_flags; uint32_t kf_eventfd_spareint[3]; uint64_t kf_eventfd_addr; } kf_eventfd; struct { uint32_t kf_timerfd_clockid; uint32_t kf_timerfd_flags; uint64_t kf_timerfd_addr; } kf_timerfd; struct { uint64_t kf_kqueue_addr; int32_t kf_kqueue_count; int32_t kf_kqueue_state; } kf_kqueue; } kf_un; }; uint16_t kf_status; /* Status flags. */ uint16_t kf_pad1; /* Round to 32 bit alignment. */ int _kf_ispare0; /* Space for more stuff. */ cap_rights_t kf_cap_rights; /* Capability rights. */ uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */ /* Truncated before copyout in sysctl */ char kf_path[PATH_MAX]; /* Path to file, if any. */ }; struct kinfo_lockf { int kl_structsize; /* Variable size of record. */ int kl_rw; int kl_type; int kl_pid; int kl_sysid; int kl_pad0; uint64_t kl_file_fsid; uint64_t kl_file_rdev; uint64_t kl_file_fileid; off_t kl_start; off_t kl_len; /* len == 0 till the EOF */ char kl_path[PATH_MAX]; }; #define KLOCKF_RW_READ 0x01 #define KLOCKF_RW_WRITE 0x02 #define KLOCKF_TYPE_FLOCK 0x01 #define KLOCKF_TYPE_PID 0x02 #define KLOCKF_TYPE_REMOTE 0x03 /* * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of * another process as a series of entries. */ #define KVME_TYPE_NONE 0 #define KVME_TYPE_DEFAULT 1 /* no longer returned */ #define KVME_TYPE_VNODE 2 #define KVME_TYPE_SWAP 3 #define KVME_TYPE_DEVICE 4 #define KVME_TYPE_PHYS 5 #define KVME_TYPE_DEAD 6 #define KVME_TYPE_SG 7 #define KVME_TYPE_MGTDEVICE 8 #define KVME_TYPE_GUARD 9 #define KVME_TYPE_UNKNOWN 255 #define KVME_PROT_READ 0x00000001 #define KVME_PROT_WRITE 0x00000002 #define KVME_PROT_EXEC 0x00000004 #define KVME_MAX_PROT_READ 0x00010000 #define KVME_MAX_PROT_WRITE 0x00020000 #define KVME_MAX_PROT_EXEC 0x00040000 #define KVME_FLAG_COW 0x00000001 #define KVME_FLAG_NEEDS_COPY 0x00000002 #define KVME_FLAG_NOCOREDUMP 0x00000004 #define KVME_FLAG_SUPER 0x00000008 #define KVME_FLAG_GROWS_UP 0x00000010 #define KVME_FLAG_GROWS_DOWN 0x00000020 #define KVME_FLAG_USER_WIRED 0x00000040 #define KVME_FLAG_SYSVSHM 0x00000080 #define KVME_FLAG_POSIXSHM 0x00000100 #if defined(__amd64__) #define KINFO_OVMENTRY_SIZE 1168 #endif #if defined(__i386__) #define KINFO_OVMENTRY_SIZE 1128 #endif struct kinfo_ovmentry { int kve_structsize; /* Size of kinfo_vmmapentry. */ int kve_type; /* Type of map entry. */ void *kve_start; /* Starting address. */ void *kve_end; /* Finishing address. */ int kve_flags; /* Flags on map entry. */ int kve_resident; /* Number of resident pages. */ int kve_private_resident; /* Number of private pages. */ int kve_protection; /* Protection bitmask. */ int kve_ref_count; /* VM obj ref count. */ int kve_shadow_count; /* VM obj shadow count. */ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ void *_kve_pspare[8]; /* Space for more stuff. */ off_t kve_offset; /* Mapping offset in object */ uint64_t kve_fileid; /* inode number if vnode */ uint32_t kve_fsid; /* dev_t of vnode location */ int _kve_ispare[3]; /* Space for more stuff. */ }; #if defined(__amd64__) || defined(__i386__) #define KINFO_VMENTRY_SIZE 1160 #endif struct kinfo_vmentry { int kve_structsize; /* Variable size of record. */ int kve_type; /* Type of map entry. */ uint64_t kve_start; /* Starting address. */ uint64_t kve_end; /* Finishing address. */ uint64_t kve_offset; /* Mapping offset in object */ uint64_t kve_vn_fileid; /* inode number if vnode */ uint32_t kve_vn_fsid_freebsd11; /* dev_t of vnode location */ int kve_flags; /* Flags on map entry. */ int kve_resident; /* Number of resident pages. */ int kve_private_resident; /* Number of private pages. */ int kve_protection; /* Protection bitmask. */ int kve_ref_count; /* VM obj ref count. */ int kve_shadow_count; /* VM obj shadow count. */ int kve_vn_type; /* Vnode type. */ uint64_t kve_vn_size; /* File size. */ uint32_t kve_vn_rdev_freebsd11; /* Device id if device. */ uint16_t kve_vn_mode; /* File mode. */ uint16_t kve_status; /* Status flags. */ union { uint64_t _kve_vn_fsid; /* dev_t of vnode location */ uint64_t _kve_obj; /* handle of anon obj */ } kve_type_spec; uint64_t kve_vn_rdev; /* Device id if device. */ int _kve_ispare[8]; /* Space for more stuff. */ /* Truncated before copyout in sysctl */ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ }; #define kve_vn_fsid kve_type_spec._kve_vn_fsid #define kve_obj kve_type_spec._kve_obj #define KVMO_FLAG_SYSVSHM 0x0001 #define KVMO_FLAG_POSIXSHM 0x0002 /* * The "vm.objects" sysctl provides a list of all VM objects in the system * via an array of these entries. */ struct kinfo_vmobject { int kvo_structsize; /* Variable size of record. */ int kvo_type; /* Object type: KVME_TYPE_*. */ uint64_t kvo_size; /* Object size in pages. */ uint64_t kvo_vn_fileid; /* inode number if vnode. */ uint32_t kvo_vn_fsid_freebsd11; /* dev_t of vnode location. */ int kvo_ref_count; /* Reference count. */ int kvo_shadow_count; /* Shadow count. */ int kvo_memattr; /* Memory attribute. */ uint64_t kvo_resident; /* Number of resident pages. */ uint64_t kvo_active; /* Number of active pages. */ uint64_t kvo_inactive; /* Number of inactive pages. */ union { uint64_t _kvo_vn_fsid; uint64_t _kvo_backing_obj; /* Handle for the backing obj */ } kvo_type_spec; /* Type-specific union */ uint64_t kvo_me; /* Uniq handle for anon obj */ uint64_t kvo_laundry; /* Number of laundry pages. */ uint64_t _kvo_qspare[5]; uint32_t kvo_swapped; /* Number of swapped pages */ uint32_t kvo_flags; uint32_t _kvo_ispare[6]; char kvo_path[PATH_MAX]; /* Pathname, if any. */ }; #define kvo_vn_fsid kvo_type_spec._kvo_vn_fsid #define kvo_backing_obj kvo_type_spec._kvo_backing_obj /* * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of * another process as a series of entries. Each stack is represented by a * series of symbol names and offsets as generated by stack_sbuf_print(9). */ #define KKST_MAXLEN 1024 #define KKST_STATE_STACKOK 0 /* Stack is valid. */ #define KKST_STATE_SWAPPED 1 /* Stack swapped out, obsolete. */ #define KKST_STATE_RUNNING 2 /* Stack ephemeral. */ #if defined(__amd64__) || defined(__i386__) #define KINFO_KSTACK_SIZE 1096 #endif struct kinfo_kstack { lwpid_t kkst_tid; /* ID of thread. */ int kkst_state; /* Validity of stack. */ char kkst_trace[KKST_MAXLEN]; /* String representing stack. */ int _kkst_ispare[16]; /* Space for more stuff. */ }; struct kinfo_sigtramp { void *ksigtramp_start; void *ksigtramp_end; void *ksigtramp_spare[4]; }; #define KMAP_FLAG_WIREFUTURE 0x01 /* all future mappings wil be wired */ #define KMAP_FLAG_ASLR 0x02 /* ASLR is applied to mappings */ #define KMAP_FLAG_ASLR_IGNSTART 0x04 /* ASLR may map into sbrk grow region */ #define KMAP_FLAG_WXORX 0x08 /* W^X mapping policy is enforced */ #define KMAP_FLAG_ASLR_STACK 0x10 /* the stack location is randomized */ #define KMAP_FLAG_ASLR_SHARED_PAGE 0x20 /* the shared page location is randomized */ struct kinfo_vm_layout { uintptr_t kvm_min_user_addr; uintptr_t kvm_max_user_addr; uintptr_t kvm_text_addr; size_t kvm_text_size; uintptr_t kvm_data_addr; size_t kvm_data_size; uintptr_t kvm_stack_addr; size_t kvm_stack_size; int kvm_map_flags; uintptr_t kvm_shp_addr; size_t kvm_shp_size; uintptr_t kvm_spare[12]; }; #define KNOTE_STATUS_ACTIVE 0x00000001 #define KNOTE_STATUS_QUEUED 0x00000002 #define KNOTE_STATUS_DISABLED 0x00000004 #define KNOTE_STATUS_DETACHED 0x00000008 #define KNOTE_STATUS_KQUEUE 0x00000010 #define KNOTE_EXTDATA_NONE 0 #define KNOTE_EXTDATA_VNODE 1 #define KNOTE_EXTDATA_PIPE 2 struct kinfo_knote { int knt_kq_fd; struct kevent knt_event; int knt_status; int knt_extdata; union { struct { int knt_vnode_type; uint64_t knt_vnode_fsid; uint64_t knt_vnode_fileid; char knt_vnode_fullpath[PATH_MAX]; } knt_vnode; struct { ino_t knt_pipe_ino; } knt_pipe; }; }; #ifdef _KERNEL /* Flags for kern_proc_out function. */ #define KERN_PROC_NOTHREADS 0x1 #define KERN_PROC_MASK32 0x2 /* Flags for kern_proc_filedesc_out. */ #define KERN_FILEDESC_PACK_KINFO 0x00000001U /* Flags for kern_proc_vmmap_out. */ #define KERN_VMMAP_PACK_KINFO 0x00000001U struct sbuf; /* * The kern_proc out functions are helper functions to dump process * miscellaneous kinfo structures to sbuf. The main consumers are KERN_PROC * sysctls but they may also be used by other kernel subsystems. * * The functions manipulate the process locking state and expect the process * to be locked on enter. On return the process is unlocked. */ int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags); int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen); int kern_proc_out(struct proc *p, struct sbuf *sb, int flags); int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags); +int kern_proc_kqueues_out(struct proc *p, struct sbuf *s, size_t maxlen, + bool compat32); int vntype_to_kinfo(int vtype); void pack_kinfo(struct kinfo_file *kif); #endif /* !_KERNEL */ #endif