Index: head/sys/kern/imgact_elf.c =================================================================== --- head/sys/kern/imgact_elf.c (revision 287536) +++ head/sys/kern/imgact_elf.c (revision 287537) @@ -1,2233 +1,2228 @@ /*- * Copyright (c) 2000 David O'Brien * Copyright (c) 1995-1996 Søren Schmidt * Copyright (c) 1996 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_compat.h" #include "opt_gzio.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ELF_NOTE_ROUNDSIZE 4 #define OLD_EI_BRAND 8 static int __elfN(check_header)(const Elf_Ehdr *hdr); static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int interp_name_len, int32_t *osrel); static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry, size_t pagesize); static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, size_t pagesize); static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel); static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); static boolean_t __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel); static vm_prot_t __elfN(trans_prot)(Elf_Word); static Elf_Word __elfN(untrans_prot)(vm_prot_t); SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0, ""); #define CORE_BUF_SIZE (16 * 1024) int __elfN(fallback_brand) = -1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); static int elf_legacy_coredump = 0; SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, &elf_legacy_coredump, 0, ""); int __elfN(nxstack) = #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ 1; #else 0; #endif SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack"); #if __ELF_WORD_SIZE == 32 #if defined(__amd64__) int i386_read_exec = 0; SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif #endif static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define trunc_page_ps(va, ps) ((va) & ~(ps - 1)) #define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1)) #define aligned(a, t) (trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a)) static const char FREEBSD_ABI_VENDOR[] = "FreeBSD"; Elf_Brandnote __elfN(freebsd_brandnote) = { .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), .hdr.n_descsz = sizeof(int32_t), .hdr.n_type = 1, .vendor = FREEBSD_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = __elfN(freebsd_trans_osrel) }; static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) { uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); *osrel = *(const int32_t *)(p); return (TRUE); } static const char GNU_ABI_VENDOR[] = "GNU"; static int GNU_KFREEBSD_ABI_DESC = 3; Elf_Brandnote __elfN(kfreebsd_brandnote) = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = kfreebsd_trans_osrel }; static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); desc = (const Elf32_Word *)p; if (desc[0] != GNU_KFREEBSD_ABI_DESC) return (FALSE); /* * Debian GNU/kFreeBSD embed the earliest compatible kernel version * (__FreeBSD_version: Rxx) in the LSB way. */ *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3]; return (TRUE); } int __elfN(insert_brand_entry)(Elf_Brandinfo *entry) { int i; for (i = 0; i < MAX_BRANDS; i++) { if (elf_brand_list[i] == NULL) { elf_brand_list[i] = entry; break; } } if (i == MAX_BRANDS) { printf("WARNING: %s: could not insert brandinfo entry: %p\n", __func__, entry); return (-1); } return (0); } int __elfN(remove_brand_entry)(Elf_Brandinfo *entry) { int i; for (i = 0; i < MAX_BRANDS; i++) { if (elf_brand_list[i] == entry) { elf_brand_list[i] = NULL; break; } } if (i == MAX_BRANDS) return (-1); return (0); } int __elfN(brand_inuse)(Elf_Brandinfo *entry) { struct proc *p; int rval = FALSE; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sysent == entry->sysvec) { rval = TRUE; break; } } sx_sunlock(&allproc_lock); return (rval); } static Elf_Brandinfo * __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int interp_name_len, int32_t *osrel) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; Elf_Brandinfo *bi; boolean_t ret; int i; /* * We support four types of branding -- (1) the ELF EI_OSABI field * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string * branding w/in the ELF header, (3) path of the `interp_path' * field, and (4) the ".note.ABI-tag" ELF section. */ /* Look for an ".note.ABI-tag" ELF section */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL) continue; if (hdr->e_machine == bi->machine && (bi->flags & (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { ret = __elfN(check_note)(imgp, bi->brand_note, osrel); if (ret) return (bi); } } /* If the executable has a brand, search for it in the brand list. */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) continue; if (hdr->e_machine == bi->machine && (hdr->e_ident[EI_OSABI] == bi->brand || strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0)) return (bi); } /* No known brand, see if the header is recognized by any brand */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY || bi->header_supported == NULL) continue; if (hdr->e_machine == bi->machine) { ret = bi->header_supported(imgp); if (ret) return (bi); } } /* Lacking a known brand, search for a recognized interpreter. */ if (interp != NULL) { for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) continue; if (hdr->e_machine == bi->machine && /* ELF image p_filesz includes terminating zero */ strlen(bi->interp_path) + 1 == interp_name_len && strncmp(interp, bi->interp_path, interp_name_len) == 0) return (bi); } } /* Lacking a recognized interpreter, try the default brand */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) continue; if (hdr->e_machine == bi->machine && __elfN(fallback_brand) == bi->brand) return (bi); } return (NULL); } static int __elfN(check_header)(const Elf_Ehdr *hdr) { Elf_Brandinfo *bi; int i; if (!IS_ELF(*hdr) || hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA || hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_phentsize != sizeof(Elf_Phdr) || hdr->e_version != ELF_TARG_VER) return (ENOEXEC); /* * Make sure we have at least one brand for this machine. */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi != NULL && bi->machine == hdr->e_machine) break; } if (i == MAX_BRANDS) return (ENOEXEC); return (0); } static int __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot) { struct sf_buf *sf; int error; vm_offset_t off; /* * Create the page if it doesn't exist yet. Ignore errors. */ vm_map_lock(map); vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end), VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); /* * Find the page from the underlying object. */ if (object) { sf = vm_imgact_map_page(object, offset); if (sf == NULL) return (KERN_FAILURE); off = offset - trunc_page(offset); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } } return (KERN_SUCCESS); } static int __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow) { struct sf_buf *sf; vm_offset_t off; vm_size_t sz; int error, rv; if (start != trunc_page(start)) { rv = __elfN(map_partial)(map, object, offset, start, round_page(start), prot); if (rv) return (rv); offset += round_page(start) - start; start = round_page(start); } if (end != round_page(end)) { rv = __elfN(map_partial)(map, object, offset + trunc_page(end) - start, trunc_page(end), end, prot); if (rv) return (rv); end = trunc_page(end); } if (end > start) { if (offset & PAGE_MASK) { /* * The mapping is not page aligned. This means we have * to copy the data. Sigh. */ rv = vm_map_find(map, NULL, 0, &start, end - start, 0, VMFS_NO_SPACE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0); if (rv) return (rv); if (object == NULL) return (KERN_SUCCESS); for (; start < end; start += sz) { sf = vm_imgact_map_page(object, offset); if (sf == NULL) return (KERN_FAILURE); off = offset - trunc_page(offset); sz = end - start; if (sz > PAGE_SIZE - off) sz = PAGE_SIZE - off; error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } offset += sz; } rv = KERN_SUCCESS; } else { vm_object_reference(object); vm_map_lock(map); rv = vm_map_insert(map, object, offset, start, end, prot, VM_PROT_ALL, cow); vm_map_unlock(map); if (rv != KERN_SUCCESS) vm_object_deallocate(object); } return (rv); } else { return (KERN_SUCCESS); } } static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, size_t pagesize) { struct sf_buf *sf; size_t map_len; vm_map_t map; vm_object_t object; vm_offset_t map_addr; int error, rv, cow; size_t copy_len; vm_offset_t file_addr; /* * It's necessary to fail if the filsz + offset taken from the * header is greater than the actual file pager object's size. * If we were to allow this, then the vm_map_find() below would * walk right off the end of the file object and into the ether. * * While I'm here, might as well check for something else that * is invalid: filsz cannot be greater than memsz. */ if ((off_t)filsz + offset > imgp->attr->va_size || filsz > memsz) { uprintf("elf_load_section: truncated ELF file\n"); return (ENOEXEC); } object = imgp->object; map = &imgp->proc->p_vmspace->vm_map; map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize); file_addr = trunc_page_ps(offset, pagesize); /* * We have two choices. We can either clear the data in the last page * of an oversized mapping, or we can start the anon mapping a page * early and copy the initialized data into that first page. We * choose the second.. */ if (memsz > filsz) map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr; else map_len = round_page_ps(offset + filsz, pagesize) - file_addr; if (map_len != 0) { /* cow flags: don't dump readonly sections in core */ cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); rv = __elfN(map_insert)(map, object, file_addr, /* file offset */ map_addr, /* virtual start */ map_addr + map_len,/* virtual end */ prot, cow); if (rv != KERN_SUCCESS) return (EINVAL); /* we can stop now if we've covered it all */ if (memsz == filsz) { return (0); } } /* * We have to get the remaining bit of the file into the first part * of the oversized map segment. This is normally because the .data * segment in the file is extended to provide bss. It's a neat idea * to try and save a page, but it's a pain in the behind to implement. */ copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize); map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize); map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) - map_addr; /* This had damn well better be true! */ if (map_len != 0) { rv = __elfN(map_insert)(map, NULL, 0, map_addr, map_addr + map_len, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) { return (EINVAL); } } if (copy_len != 0) { vm_offset_t off; sf = vm_imgact_map_page(object, offset + filsz); if (sf == NULL) return (EIO); /* send the page fragment to user space */ off = trunc_page_ps(offset + filsz, pagesize) - trunc_page(offset + filsz); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)map_addr, copy_len); vm_imgact_unmap_page(sf); if (error) { return (error); } } /* * set it to the specified protection. * XXX had better undo the damage from pasting over the cracks here! */ vm_map_protect(map, trunc_page(map_addr), round_page(map_addr + map_len), prot, FALSE); return (0); } /* * Load the file "file" into memory. It may be either a shared object * or an executable. * * The "addr" reference parameter is in/out. On entry, it specifies * the address where a shared object should be loaded. If the file is * an executable, this value is ignored. On exit, "addr" specifies * where the file was actually loaded. * * The "entry" reference parameter is out only. On exit, it specifies * the entry point for the loaded file. */ static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry, size_t pagesize) { struct { struct nameidata nd; struct vattr attr; struct image_params image_params; } *tempdata; const Elf_Ehdr *hdr = NULL; const Elf_Phdr *phdr = NULL; struct nameidata *nd; struct vattr *attr; struct image_params *imgp; vm_prot_t prot; u_long rbase; u_long base_addr = 0; int error, i, numsegs; #ifdef CAPABILITY_MODE /* * XXXJA: This check can go away once we are sufficiently confident * that the checks in namei() are correct. */ if (IN_CAPABILITY_MODE(curthread)) return (ECAPMODE); #endif tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK); nd = &tempdata->nd; attr = &tempdata->attr; imgp = &tempdata->image_params; /* * Initialize part of the common data */ imgp->proc = p; imgp->attr = attr; imgp->firstpage = NULL; imgp->image_header = NULL; imgp->object = NULL; imgp->execlabel = NULL; NDINIT(nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, curthread); if ((error = namei(nd)) != 0) { nd->ni_vp = NULL; goto fail; } NDFREE(nd, NDF_ONLY_PNBUF); imgp->vp = nd->ni_vp; /* * Check permissions, modes, uid, etc on the file, and "open" it. */ error = exec_check_permissions(imgp); if (error) goto fail; error = exec_map_first_page(imgp); if (error) goto fail; /* * Also make certain that the interpreter stays the same, so set * its VV_TEXT flag, too. */ VOP_SET_TEXT(nd->ni_vp); imgp->object = nd->ni_vp->v_object; hdr = (const Elf_Ehdr *)imgp->image_header; if ((error = __elfN(check_header)(hdr)) != 0) goto fail; if (hdr->e_type == ET_DYN) rbase = *addr; else if (hdr->e_type == ET_EXEC) rbase = 0; else { error = ENOEXEC; goto fail; } /* Only support headers that fit within first page for now */ if ((hdr->e_phoff > PAGE_SIZE) || (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) { error = ENOEXEC; goto fail; } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); if (!aligned(phdr, Elf_Addr)) { error = ENOEXEC; goto fail; } for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) { /* Loadable segment */ prot = __elfN(trans_prot)(phdr[i].p_flags); error = __elfN(load_section)(imgp, phdr[i].p_offset, (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, phdr[i].p_memsz, phdr[i].p_filesz, prot, pagesize); if (error != 0) goto fail; /* * Establish the base address if this is the * first segment. */ if (numsegs == 0) base_addr = trunc_page(phdr[i].p_vaddr + rbase); numsegs++; } } *addr = base_addr; *entry = (unsigned long)hdr->e_entry + rbase; fail: if (imgp->firstpage) exec_unmap_first_page(imgp); if (nd->ni_vp) vput(nd->ni_vp); free(tempdata, M_TEMP); return (error); } static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs; struct vmspace *vmspace; vm_prot_t prot; u_long text_size = 0, data_size = 0, total_size = 0; u_long text_addr = 0, data_addr = 0; u_long seg_size, seg_addr; u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0; int32_t osrel = 0; int error = 0, i, n, interp_name_len = 0; const char *err_str = NULL, *interp = NULL, *newinterp = NULL; Elf_Brandinfo *brand_info; char *path; struct sysentvec *sv; /* * Do we have a valid ELF header ? * * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later * if particular brand doesn't support it. */ if (__elfN(check_header)(hdr) != 0 || (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) return (-1); /* * From here on down, we return an errno, not -1, as we've * detected an ELF file. */ if ((hdr->e_phoff > PAGE_SIZE) || (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) { /* Only support headers in first page for now */ uprintf("Program headers not in the first page\n"); return (ENOEXEC); } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); if (!aligned(phdr, Elf_Addr)) { uprintf("Unaligned program headers\n"); return (ENOEXEC); } n = 0; baddr = 0; for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: if (n == 0) baddr = phdr[i].p_vaddr; n++; break; case PT_INTERP: /* Path to interpreter */ if (phdr[i].p_filesz > MAXPATHLEN || phdr[i].p_offset > PAGE_SIZE || phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset) { uprintf("Invalid PT_INTERP\n"); return (ENOEXEC); } interp = imgp->image_header + phdr[i].p_offset; interp_name_len = phdr[i].p_filesz; break; case PT_GNU_STACK: if (__elfN(nxstack)) imgp->stack_prot = __elfN(trans_prot)(phdr[i].p_flags); imgp->stack_sz = phdr[i].p_memsz; break; } } brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len, &osrel); if (brand_info == NULL) { uprintf("ELF binary type \"%u\" not known.\n", hdr->e_ident[EI_OSABI]); return (ENOEXEC); } if (hdr->e_type == ET_DYN) { if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { uprintf("Cannot execute shared object\n"); return (ENOEXEC); } /* * Honour the base load address from the dso if it is * non-zero for some reason. */ if (baddr == 0) et_dyn_addr = ET_DYN_LOAD_ADDR; else et_dyn_addr = 0; } else et_dyn_addr = 0; sv = brand_info->sysvec; if (interp != NULL && brand_info->interp_newpath != NULL) newinterp = brand_info->interp_newpath; /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. * * The VV_TEXT flag prevents modifications to the executable while * the vnode is unlocked. */ VOP_UNLOCK(imgp->vp, 0); error = exec_new_vmspace(imgp, sv); imgp->proc->p_sysent = sv; vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: /* Loadable segment */ if (phdr[i].p_memsz == 0) break; prot = __elfN(trans_prot)(phdr[i].p_flags); error = __elfN(load_section)(imgp, phdr[i].p_offset, (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr, phdr[i].p_memsz, phdr[i].p_filesz, prot, sv->sv_pagesize); if (error != 0) return (error); /* * If this segment contains the program headers, * remember their virtual address for the AT_PHDR * aux entry. Static binaries don't usually include * a PT_PHDR entry. */ if (phdr[i].p_offset == 0 && hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize <= phdr[i].p_filesz) proghdr = phdr[i].p_vaddr + hdr->e_phoff + et_dyn_addr; seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); seg_size = round_page(phdr[i].p_memsz + phdr[i].p_vaddr + et_dyn_addr - seg_addr); /* * Make the largest executable segment the official * text segment and all others data. * * Note that obreak() assumes that data_addr + * data_size == end of data load area, and the ELF * file format expects segments to be sorted by * address. If multiple data segments exist, the * last one will be used. */ if (phdr[i].p_flags & PF_X && text_size < seg_size) { text_size = seg_size; text_addr = seg_addr; } else { data_size = seg_size; data_addr = seg_addr; } total_size += seg_size; break; case PT_PHDR: /* Program header table info */ proghdr = phdr[i].p_vaddr + et_dyn_addr; break; default: break; } } if (data_addr == 0 && data_size == 0) { data_addr = text_addr; data_size = text_size; } entry = (u_long)hdr->e_entry + et_dyn_addr; /* * Check limits. It should be safe to check the * limits after loading the segments since we do * not actually fault in all the segments pages. */ PROC_LOCK(imgp->proc); if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA)) err_str = "Data segment size exceeds process limit"; else if (text_size > maxtsiz) err_str = "Text segment size exceeds system limit"; else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM)) err_str = "Total segment size exceeds process limit"; else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0) err_str = "Data segment size exceeds resource limit"; else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) err_str = "Total segment size exceeds resource limit"; if (err_str != NULL) { PROC_UNLOCK(imgp->proc); uprintf("%s\n", err_str); return (ENOMEM); } vmspace = imgp->proc->p_vmspace; vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; /* * We load the dynamic linker where a userland call * to mmap(0, ...) would put it. The rationale behind this * calculation is that it leaves room for the heap to grow to * its maximum allowed size. */ addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(curthread, RLIMIT_DATA)); PROC_UNLOCK(imgp->proc); imgp->entry_addr = entry; if (interp != NULL) { int have_interp = FALSE; VOP_UNLOCK(imgp->vp, 0); if (brand_info->emul_path != NULL && brand_info->emul_path[0] != '\0') { path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); snprintf(path, MAXPATHLEN, "%s%s", brand_info->emul_path, interp); error = __elfN(load_file)(imgp->proc, path, &addr, &imgp->entry_addr, sv->sv_pagesize); free(path, M_TEMP); if (error == 0) have_interp = TRUE; } if (!have_interp && newinterp != NULL) { error = __elfN(load_file)(imgp->proc, newinterp, &addr, &imgp->entry_addr, sv->sv_pagesize); if (error == 0) have_interp = TRUE; } if (!have_interp) { error = __elfN(load_file)(imgp->proc, interp, &addr, &imgp->entry_addr, sv->sv_pagesize); } vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) { uprintf("ELF interpreter %s not found\n", interp); return (error); } } else addr = et_dyn_addr; /* * Construct auxargs table (used by the fixup routine) */ elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); elf_auxargs->execfd = -1; elf_auxargs->phdr = proghdr; elf_auxargs->phent = hdr->e_phentsize; elf_auxargs->phnum = hdr->e_phnum; elf_auxargs->pagesz = PAGE_SIZE; elf_auxargs->base = addr; elf_auxargs->flags = 0; elf_auxargs->entry = entry; elf_auxargs->hdr_eflags = hdr->e_flags; imgp->auxargs = elf_auxargs; imgp->interpreted = 0; imgp->reloc_base = addr; imgp->proc->p_osrel = osrel; return (error); } #define suword __CONCAT(suword, __ELF_WORD_SIZE) int __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; Elf_Addr *base; Elf_Addr *pos; base = (Elf_Addr *)*stack_base; pos = base + (imgp->args->argc + imgp->args->envc + 2); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); #ifdef AT_EHDRFLAGS AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags); #endif if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp); AUXARGS_ENTRY(pos, AT_OSRELDATE, imgp->proc->p_ucred->cr_prison->pr_osreldate); if (imgp->canary != 0) { AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary); AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen); } AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus); if (imgp->pagesizes != 0) { AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes); AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); } if (imgp->sysent->sv_timekeep_base != 0) { AUXARGS_ENTRY(pos, AT_TIMEKEEP, imgp->sysent->sv_timekeep_base); } AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : imgp->sysent->sv_stackprot); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; base--; suword(base, (long)imgp->args->argc); *stack_base = (register_t *)base; return (0); } /* * Code for generating ELF core dumps. */ typedef void (*segment_callback)(vm_map_entry_t, void *); /* Closure for cb_put_phdr(). */ struct phdr_closure { Elf_Phdr *phdr; /* Program header to fill in */ Elf_Off offset; /* Offset of segment in core file */ }; /* Closure for cb_size_segment(). */ struct sseg_closure { int count; /* Count of writable segments. */ size_t size; /* Total size of all writable segments. */ }; typedef void (*outfunc_t)(void *, struct sbuf *, size_t *); struct note_info { int type; /* Note type. */ outfunc_t outfunc; /* Output function. */ void *outarg; /* Argument for the output function. */ size_t outsize; /* Output size. */ TAILQ_ENTRY(note_info) link; /* Link to the next note info. */ }; TAILQ_HEAD(note_info_list, note_info); /* Coredump output parameters. */ struct coredump_params { off_t offset; struct ucred *active_cred; struct ucred *file_cred; struct thread *td; struct vnode *vp; struct gzio_stream *gzs; }; static void cb_put_phdr(vm_map_entry_t, void *); static void cb_size_segment(vm_map_entry_t, void *); static int core_write(struct coredump_params *, void *, size_t, off_t, enum uio_seg); static void each_writable_segment(struct thread *, segment_callback, void *); static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t, struct note_info_list *, size_t); static void __elfN(prepare_notes)(struct thread *, struct note_info_list *, size_t *); static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t); static void __elfN(putnote)(struct note_info *, struct sbuf *); static size_t register_note(struct note_info_list *, int, outfunc_t, void *); static int sbuf_drain_core_output(void *, const char *, int); static int sbuf_drain_count(void *arg, const char *data, int len); static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *); static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *); static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *); static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *); static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *); static void note_procstat_files(void *, struct sbuf *, size_t *); static void note_procstat_groups(void *, struct sbuf *, size_t *); static void note_procstat_osrel(void *, struct sbuf *, size_t *); static void note_procstat_rlimit(void *, struct sbuf *, size_t *); static void note_procstat_umask(void *, struct sbuf *, size_t *); static void note_procstat_vmmap(void *, struct sbuf *, size_t *); #ifdef GZIO extern int compress_user_cores_gzlevel; /* * Write out a core segment to the compression stream. */ static int compress_chunk(struct coredump_params *p, char *base, char *buf, u_int len) { u_int chunk_len; int error; while (len > 0) { chunk_len = MIN(len, CORE_BUF_SIZE); copyin(base, buf, chunk_len); error = gzio_write(p->gzs, buf, chunk_len); if (error != 0) break; base += chunk_len; len -= chunk_len; } return (error); } static int core_gz_write(void *base, size_t len, off_t offset, void *arg) { return (core_write((struct coredump_params *)arg, base, len, offset, UIO_SYSSPACE)); } #endif /* GZIO */ static int core_write(struct coredump_params *p, void *base, size_t len, off_t offset, enum uio_seg seg) { return (vn_rdwr_inchunks(UIO_WRITE, p->vp, base, len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, p->active_cred, p->file_cred, NULL, p->td)); } static int core_output(void *base, size_t len, off_t offset, struct coredump_params *p, void *tmpbuf) { #ifdef GZIO if (p->gzs != NULL) return (compress_chunk(p, base, tmpbuf, len)); #endif return (core_write(p, base, len, offset, UIO_USERSPACE)); } /* * Drain into a core file. */ static int sbuf_drain_core_output(void *arg, const char *data, int len) { struct coredump_params *p; int error, locked; p = (struct coredump_params *)arg; /* * Some kern_proc out routines that print to this sbuf may * call us with the process lock held. Draining with the * non-sleepable lock held is unsafe. The lock is needed for * those routines when dumping a live process. In our case we * can safely release the lock before draining and acquire * again after. */ locked = PROC_LOCKED(p->td->td_proc); if (locked) PROC_UNLOCK(p->td->td_proc); #ifdef GZIO if (p->gzs != NULL) error = gzio_write(p->gzs, __DECONST(char *, data), len); else #endif error = core_write(p, __DECONST(void *, data), len, p->offset, UIO_SYSSPACE); if (locked) PROC_LOCK(p->td->td_proc); if (error != 0) return (-error); p->offset += len; return (len); } /* * Drain into a counter. */ static int sbuf_drain_count(void *arg, const char *data __unused, int len) { size_t *sizep; sizep = (size_t *)arg; *sizep += len; return (len); } int __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) { struct ucred *cred = td->td_ucred; int error = 0; struct sseg_closure seginfo; struct note_info_list notelst; struct coredump_params params; struct note_info *ninfo; void *hdr, *tmpbuf; size_t hdrsize, notesz, coresize; boolean_t compress; compress = (flags & IMGACT_CORE_COMPRESS) != 0; hdr = NULL; tmpbuf = NULL; TAILQ_INIT(¬elst); /* Size the program segments. */ seginfo.count = 0; seginfo.size = 0; each_writable_segment(td, cb_size_segment, &seginfo); /* * Collect info about the core file header area. */ hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count); __elfN(prepare_notes)(td, ¬elst, ¬esz); coresize = round_page(hdrsize + notesz) + seginfo.size; /* Set up core dump parameters. */ params.offset = 0; params.active_cred = cred; params.file_cred = NOCRED; params.td = td; params.vp = vp; params.gzs = NULL; #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); error = racct_add(td->td_proc, RACCT_CORE, coresize); PROC_UNLOCK(td->td_proc); if (error != 0) { error = EFAULT; goto done; } } #endif if (coresize >= limit) { error = EFAULT; goto done; } #ifdef GZIO /* Create a compression stream if necessary. */ if (compress) { params.gzs = gzio_init(core_gz_write, GZIO_DEFLATE, CORE_BUF_SIZE, compress_user_cores_gzlevel, ¶ms); if (params.gzs == NULL) { error = EFAULT; goto done; } tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); } #endif /* * Allocate memory for building the header, fill it up, * and write it out following the notes. */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); if (hdr == NULL) { error = EINVAL; goto done; } error = __elfN(corehdr)(¶ms, seginfo.count, hdr, hdrsize, ¬elst, notesz); /* Write the contents of all of the writable segments. */ if (error == 0) { Elf_Phdr *php; off_t offset; int i; php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; offset = round_page(hdrsize + notesz); for (i = 0; i < seginfo.count; i++) { error = core_output((caddr_t)(uintptr_t)php->p_vaddr, php->p_filesz, offset, ¶ms, tmpbuf); if (error != 0) break; offset += php->p_filesz; php++; } #ifdef GZIO if (error == 0 && compress) error = gzio_flush(params.gzs); #endif } if (error) { log(LOG_WARNING, "Failed to write core file for process %s (error %d)\n", curproc->p_comm, error); } done: #ifdef GZIO if (compress) { free(tmpbuf, M_TEMP); if (params.gzs != NULL) gzio_fini(params.gzs); } #endif while ((ninfo = TAILQ_FIRST(¬elst)) != NULL) { TAILQ_REMOVE(¬elst, ninfo, link); free(ninfo, M_TEMP); } if (hdr != NULL) free(hdr, M_TEMP); return (error); } /* * A callback for each_writable_segment() to write out the segment's * program header entry. */ static void cb_put_phdr(entry, closure) vm_map_entry_t entry; void *closure; { struct phdr_closure *phc = (struct phdr_closure *)closure; Elf_Phdr *phdr = phc->phdr; phc->offset = round_page(phc->offset); phdr->p_type = PT_LOAD; phdr->p_offset = phc->offset; phdr->p_vaddr = entry->start; phdr->p_paddr = 0; phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; phdr->p_align = PAGE_SIZE; phdr->p_flags = __elfN(untrans_prot)(entry->protection); phc->offset += phdr->p_filesz; phc->phdr++; } /* * A callback for each_writable_segment() to gather information about * the number of segments and their total size. */ static void cb_size_segment(entry, closure) vm_map_entry_t entry; void *closure; { struct sseg_closure *ssc = (struct sseg_closure *)closure; ssc->count++; ssc->size += entry->end - entry->start; } /* * For each writable segment in the process's memory map, call the given * function with a pointer to the map entry and some arbitrary * caller-supplied data. */ static void each_writable_segment(td, func, closure) struct thread *td; segment_callback func; void *closure; { struct proc *p = td->td_proc; vm_map_t map = &p->p_vmspace->vm_map; vm_map_entry_t entry; vm_object_t backing_object, object; boolean_t ignore_entry; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { /* * Don't dump inaccessible mappings, deal with legacy * coredump mode. * * Note that read-only segments related to the elf binary * are marked MAP_ENTRY_NOCOREDUMP now so we no longer * need to arbitrarily ignore such segments. */ if (elf_legacy_coredump) { if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) continue; } else { if ((entry->protection & VM_PROT_ALL) == 0) continue; } /* * Dont include memory segment in the coredump if * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in * madvise(2). Do not dump submaps (i.e. parts of the * kernel map). */ if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) continue; if ((object = entry->object.vm_object) == NULL) continue; /* Ignore memory-mapped devices and such things. */ VM_OBJECT_RLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_RLOCK(backing_object); VM_OBJECT_RUNLOCK(object); object = backing_object; } ignore_entry = object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE && object->type != OBJT_PHYS; VM_OBJECT_RUNLOCK(object); if (ignore_entry) continue; (*func)(entry, closure); } vm_map_unlock_read(map); } /* * Write the core file header to the file, including padding up to * the page boundary. */ static int __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr, size_t hdrsize, struct note_info_list *notelst, size_t notesz) { struct note_info *ninfo; struct sbuf *sb; int error; /* Fill in the header. */ bzero(hdr, hdrsize); __elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz); sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_core_output, p); sbuf_start_section(sb, NULL); sbuf_bcat(sb, hdr, hdrsize); TAILQ_FOREACH(ninfo, notelst, link) __elfN(putnote)(ninfo, sb); /* Align up to a page boundary for the program segments. */ sbuf_end_section(sb, -1, PAGE_SIZE, 0); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } static void __elfN(prepare_notes)(struct thread *td, struct note_info_list *list, size_t *sizep) { struct proc *p; struct thread *thr; size_t size; p = td->td_proc; size = 0; size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p); /* * To have the debugger select the right thread (LWP) as the initial * thread, we dump the state of the thread passed to us in td first. * This is the thread that causes the core dump and thus likely to * be the right thread one wants to have selected in the debugger. */ thr = td; while (thr != NULL) { size += register_note(list, NT_PRSTATUS, __elfN(note_prstatus), thr); size += register_note(list, NT_FPREGSET, __elfN(note_fpregset), thr); size += register_note(list, NT_THRMISC, __elfN(note_thrmisc), thr); size += register_note(list, -1, __elfN(note_threadmd), thr); thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) : TAILQ_NEXT(thr, td_plist); if (thr == td) thr = TAILQ_NEXT(thr, td_plist); } size += register_note(list, NT_PROCSTAT_PROC, __elfN(note_procstat_proc), p); size += register_note(list, NT_PROCSTAT_FILES, note_procstat_files, p); size += register_note(list, NT_PROCSTAT_VMMAP, note_procstat_vmmap, p); size += register_note(list, NT_PROCSTAT_GROUPS, note_procstat_groups, p); size += register_note(list, NT_PROCSTAT_UMASK, note_procstat_umask, p); size += register_note(list, NT_PROCSTAT_RLIMIT, note_procstat_rlimit, p); size += register_note(list, NT_PROCSTAT_OSREL, note_procstat_osrel, p); size += register_note(list, NT_PROCSTAT_PSSTRINGS, __elfN(note_procstat_psstrings), p); size += register_note(list, NT_PROCSTAT_AUXV, __elfN(note_procstat_auxv), p); *sizep = size; } static void __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs, size_t notesz) { Elf_Ehdr *ehdr; Elf_Phdr *phdr; struct phdr_closure phc; ehdr = (Elf_Ehdr *)hdr; phdr = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)); ehdr->e_ident[EI_MAG0] = ELFMAG0; ehdr->e_ident[EI_MAG1] = ELFMAG1; ehdr->e_ident[EI_MAG2] = ELFMAG2; ehdr->e_ident[EI_MAG3] = ELFMAG3; ehdr->e_ident[EI_CLASS] = ELF_CLASS; ehdr->e_ident[EI_DATA] = ELF_DATA; ehdr->e_ident[EI_VERSION] = EV_CURRENT; ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; ehdr->e_ident[EI_ABIVERSION] = 0; ehdr->e_ident[EI_PAD] = 0; ehdr->e_type = ET_CORE; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 ehdr->e_machine = ELF_ARCH32; #else ehdr->e_machine = ELF_ARCH; #endif ehdr->e_version = EV_CURRENT; ehdr->e_entry = 0; ehdr->e_phoff = sizeof(Elf_Ehdr); ehdr->e_flags = 0; ehdr->e_ehsize = sizeof(Elf_Ehdr); ehdr->e_phentsize = sizeof(Elf_Phdr); ehdr->e_phnum = numsegs + 1; ehdr->e_shentsize = sizeof(Elf_Shdr); ehdr->e_shnum = 0; ehdr->e_shstrndx = SHN_UNDEF; /* * Fill in the program header entries. */ /* The note segement. */ phdr->p_type = PT_NOTE; phdr->p_offset = hdrsize; phdr->p_vaddr = 0; phdr->p_paddr = 0; phdr->p_filesz = notesz; phdr->p_memsz = 0; phdr->p_flags = PF_R; phdr->p_align = ELF_NOTE_ROUNDSIZE; phdr++; /* All the writable segments from the program. */ phc.phdr = phdr; phc.offset = round_page(hdrsize + notesz); each_writable_segment(td, cb_put_phdr, &phc); } static size_t register_note(struct note_info_list *list, int type, outfunc_t out, void *arg) { struct note_info *ninfo; size_t size, notesize; size = 0; out(arg, NULL, &size); ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK); ninfo->type = type; ninfo->outfunc = out; ninfo->outarg = arg; ninfo->outsize = size; TAILQ_INSERT_TAIL(list, ninfo, link); if (type == -1) return (size); notesize = sizeof(Elf_Note) + /* note header */ roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } static size_t append_note_data(const void *src, void *dst, size_t len) { size_t padded_len; padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE); if (dst != NULL) { bcopy(src, dst, len); bzero((char *)dst + len, padded_len - len); } return (padded_len); } size_t __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp) { Elf_Note *note; char *buf; size_t notesize; buf = dst; if (buf != NULL) { note = (Elf_Note *)buf; note->n_namesz = sizeof(FREEBSD_ABI_VENDOR); note->n_descsz = size; note->n_type = type; buf += sizeof(*note); buf += append_note_data(FREEBSD_ABI_VENDOR, buf, sizeof(FREEBSD_ABI_VENDOR)); append_note_data(src, buf, size); if (descp != NULL) *descp = buf; } notesize = sizeof(Elf_Note) + /* note header */ roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } static void __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) { Elf_Note note; ssize_t old_len, sect_len; size_t new_len, descsz, i; if (ninfo->type == -1) { ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); return; } note.n_namesz = sizeof(FREEBSD_ABI_VENDOR); note.n_descsz = ninfo->outsize; note.n_type = ninfo->type; sbuf_bcat(sb, ¬e, sizeof(note)); sbuf_start_section(sb, &old_len); sbuf_bcat(sb, FREEBSD_ABI_VENDOR, sizeof(FREEBSD_ABI_VENDOR)); sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); if (note.n_descsz == 0) return; sbuf_start_section(sb, &old_len); ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); if (sect_len < 0) return; new_len = (size_t)sect_len; descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE); if (new_len < descsz) { /* * It is expected that individual note emitters will correctly * predict their expected output size and fill up to that size * themselves, padding in a format-specific way if needed. * However, in case they don't, just do it here with zeros. */ for (i = 0; i < descsz - new_len; i++) sbuf_putc(sb, 0); } else if (new_len > descsz) { /* * We can't always truncate sb -- we may have drained some * of it already. */ KASSERT(new_len == descsz, ("%s: Note type %u changed as we " "read it (%zu > %zu). Since it is longer than " "expected, this coredump's notes are corrupt. THIS " "IS A BUG in the note_procstat routine for type %u.\n", __func__, (unsigned)note.n_type, new_len, descsz, (unsigned)note.n_type)); } } /* * Miscellaneous note out functions. */ #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 #include typedef struct prstatus32 elf_prstatus_t; typedef struct prpsinfo32 elf_prpsinfo_t; typedef struct fpreg32 elf_prfpregset_t; typedef struct fpreg32 elf_fpregset_t; typedef struct reg32 elf_gregset_t; typedef struct thrmisc32 elf_thrmisc_t; #define ELF_KERN_PROC_MASK KERN_PROC_MASK32 typedef struct kinfo_proc32 elf_kinfo_proc_t; typedef uint32_t elf_ps_strings_t; #else typedef prstatus_t elf_prstatus_t; typedef prpsinfo_t elf_prpsinfo_t; typedef prfpregset_t elf_prfpregset_t; typedef prfpregset_t elf_fpregset_t; typedef gregset_t elf_gregset_t; typedef thrmisc_t elf_thrmisc_t; #define ELF_KERN_PROC_MASK 0 typedef struct kinfo_proc elf_kinfo_proc_t; typedef vm_offset_t elf_ps_strings_t; #endif static void __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; elf_prpsinfo_t *psinfo; p = (struct proc *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*psinfo), ("invalid size")); psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK); psinfo->pr_version = PRPSINFO_VERSION; psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); /* * XXX - We don't fill in the command line arguments properly * yet. */ strlcpy(psinfo->pr_psargs, p->p_comm, sizeof(psinfo->pr_psargs)); sbuf_bcat(sb, psinfo, sizeof(*psinfo)); free(psinfo, M_TEMP); } *sizep = sizeof(*psinfo); } static void __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; elf_prstatus_t *status; td = (struct thread *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*status), ("invalid size")); status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK); status->pr_version = PRSTATUS_VERSION; status->pr_statussz = sizeof(elf_prstatus_t); status->pr_gregsetsz = sizeof(elf_gregset_t); status->pr_fpregsetsz = sizeof(elf_fpregset_t); status->pr_osreldate = osreldate; status->pr_cursig = td->td_proc->p_sig; status->pr_pid = td->td_tid; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 fill_regs32(td, &status->pr_reg); #else fill_regs(td, &status->pr_reg); #endif sbuf_bcat(sb, status, sizeof(*status)); free(status, M_TEMP); } *sizep = sizeof(*status); } static void __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; elf_prfpregset_t *fpregset; td = (struct thread *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*fpregset), ("invalid size")); fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK); #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 fill_fpregs32(td, fpregset); #else fill_fpregs(td, fpregset); #endif sbuf_bcat(sb, fpregset, sizeof(*fpregset)); free(fpregset, M_TEMP); } *sizep = sizeof(*fpregset); } static void __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; elf_thrmisc_t thrmisc; td = (struct thread *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(thrmisc), ("invalid size")); bzero(&thrmisc._pad, sizeof(thrmisc._pad)); strcpy(thrmisc.pr_tname, td->td_name); sbuf_bcat(sb, &thrmisc, sizeof(thrmisc)); } *sizep = sizeof(thrmisc); } /* * Allow for MD specific notes, as well as any MD * specific preparations for writing MI notes. */ static void __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; void *buf; size_t size; td = (struct thread *)arg; size = *sizep; if (size != 0 && sb != NULL) buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK); else buf = NULL; size = 0; __elfN(dump_thread)(td, buf, &size); KASSERT(sb == NULL || *sizep == size, ("invalid size")); if (size != 0 && sb != NULL) sbuf_bcat(sb, buf, size); free(buf, M_TEMP); *sizep = size; } #ifdef KINFO_PROC_SIZE CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); #endif static void __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + p->p_numthreads * sizeof(elf_kinfo_proc_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(elf_kinfo_proc_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); sx_slock(&proctree_lock); PROC_LOCK(p); kern_proc_out(p, sb, ELF_KERN_PROC_MASK); sx_sunlock(&proctree_lock); } *sizep = size; } #ifdef KINFO_FILE_SIZE CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); #endif -static int pack_fileinfo = 1; -SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, - &pack_fileinfo, 0, - "Enable file path packing in 'procstat -f' coredump notes"); - static void note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size, sect_sz, i; ssize_t start_len, sect_len; int structsize, filedesc_flags; - if (pack_fileinfo) + if (coredump_pack_fileinfo) filedesc_flags = KERN_FILEDESC_PACK_KINFO; else filedesc_flags = 0; p = (struct proc *)arg; structsize = sizeof(struct kinfo_file); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_filedesc_out(p, sb, -1, filedesc_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { sbuf_start_section(sb, &start_len); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize), filedesc_flags); sect_len = sbuf_end_section(sb, start_len, 0, 0); if (sect_len < 0) return; sect_sz = sect_len; KASSERT(sect_sz <= *sizep, ("kern_proc_filedesc_out did not respect maxlen; " "requested %zu, got %zu", *sizep - sizeof(structsize), sect_sz - sizeof(structsize))); for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) sbuf_putc(sb, 0); } } #ifdef KINFO_VMENTRY_SIZE CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); #endif static void note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_vmmap_out(p, sb); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { structsize = sizeof(struct kinfo_vmentry); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_vmmap_out(p, sb); } } static void note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(gid_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups * sizeof(gid_t)); } *sizep = size; } static void note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(p->p_fd->fd_cmask); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask)); } *sizep = size; } static void note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; struct rlimit rlim[RLIM_NLIMITS]; size_t size; int structsize, i; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(rlim); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(rlim); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); for (i = 0; i < RLIM_NLIMITS; i++) lim_rlimit_proc(p, i, &rlim[i]); PROC_UNLOCK(p); sbuf_bcat(sb, rlim, sizeof(rlim)); } *sizep = size; } static void note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(p->p_osrel); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(p->p_osrel); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel)); } *sizep = size; } static void __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; elf_ps_strings_t ps_strings; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(ps_strings); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(ps_strings); #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 ps_strings = PTROUT(p->p_sysent->sv_psstrings); #else ps_strings = p->p_sysent->sv_psstrings; #endif sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &ps_strings, sizeof(ps_strings)); } *sizep = size; } static void __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PHOLD(p); proc_getauxv(curthread, p, sb); PRELE(p); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { structsize = sizeof(Elf_Auxinfo); sbuf_bcat(sb, &structsize, sizeof(structsize)); PHOLD(p); proc_getauxv(curthread, p, sb); PRELE(p); } } static boolean_t __elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel, const Elf_Phdr *pnote) { const Elf_Note *note, *note0, *note_end; const char *note_name; int i; if (pnote == NULL || pnote->p_offset > PAGE_SIZE || pnote->p_filesz > PAGE_SIZE - pnote->p_offset) return (FALSE); note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset); note_end = (const Elf_Note *)(imgp->image_header + pnote->p_offset + pnote->p_filesz); for (i = 0; i < 100 && note >= note0 && note < note_end; i++) { if (!aligned(note, Elf32_Addr) || (const char *)note_end - (const char *)note < sizeof(Elf_Note)) return (FALSE); if (note->n_namesz != checknote->hdr.n_namesz || note->n_descsz != checknote->hdr.n_descsz || note->n_type != checknote->hdr.n_type) goto nextnote; note_name = (const char *)(note + 1); if (note_name + checknote->hdr.n_namesz >= (const char *)note_end || strncmp(checknote->vendor, note_name, checknote->hdr.n_namesz) != 0) goto nextnote; /* * Fetch the osreldate for binary * from the ELF OSABI-note if necessary. */ if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 && checknote->trans_osrel != NULL) return (checknote->trans_osrel(note, osrel)); return (TRUE); nextnote: note = (const Elf_Note *)((const char *)(note + 1) + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) + roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE)); } return (FALSE); } /* * Try to find the appropriate ABI-note section for checknote, * fetch the osreldate for binary from the ELF OSABI-note. Only the * first page of the image is searched, the same as for headers. */ static boolean_t __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel) { const Elf_Phdr *phdr; const Elf_Ehdr *hdr; int i; hdr = (const Elf_Ehdr *)imgp->image_header; phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); for (i = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i])) return (TRUE); } return (FALSE); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw __elfN(execsw) = { __CONCAT(exec_, __elfN(imgact)), __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) }; EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); static vm_prot_t __elfN(trans_prot)(Elf_Word flags) { vm_prot_t prot; prot = 0; if (flags & PF_X) prot |= VM_PROT_EXECUTE; if (flags & PF_W) prot |= VM_PROT_WRITE; if (flags & PF_R) prot |= VM_PROT_READ; #if __ELF_WORD_SIZE == 32 #if defined(__amd64__) if (i386_read_exec && (flags & PF_R)) prot |= VM_PROT_EXECUTE; #endif #endif return (prot); } static Elf_Word __elfN(untrans_prot)(vm_prot_t prot) { Elf_Word flags; flags = 0; if (prot & VM_PROT_EXECUTE) flags |= PF_X; if (prot & VM_PROT_READ) flags |= PF_R; if (prot & VM_PROT_WRITE) flags |= PF_W; return (flags); } Index: head/sys/kern/kern_exec.c =================================================================== --- head/sys/kern/kern_exec.c (revision 287536) +++ head/sys/kern/kern_exec.c (revision 287537) @@ -1,1595 +1,1600 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); +int coredump_pack_fileinfo = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, + &coredump_pack_fileinfo, 0, + "Enable file path packing in 'procstat -f' coredump notes"); + static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, NULL, 0, sysctl_kern_ps_strings, "LU", ""); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, sysctl_kern_stackprot, "I", ""); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, ""); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_psstrings; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, sizeof(p->p_sysent->sv_psstrings)); return error; } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_usrstack; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, sizeof(p->p_sysent->sv_usrstack)); return error; } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(struct thread *td, struct execve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL); post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; } #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL); } post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p); post_execve(td, error, oldvmspace); return (error); #else return (ENOSYS); #endif } int pre_execve(struct thread *td, struct vmspace **oldvmspace) { struct proc *p; int error; KASSERT(td == curthread, ("non-current thread %p", td)); error = 0; p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY) != 0) error = ERESTART; PROC_UNLOCK(p); } KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); *oldvmspace = p->p_vmspace; return (error); } void post_execve(struct thread *td, int error, struct vmspace *oldvmspace) { struct proc *p; KASSERT(td == curthread, ("non-current thread %p", td)); p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == 0) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } } /* * XXX: kern_execve has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p) { AUDIT_ARG_ARGV(args->begin_argv, args->argc, args->begin_envv - args->begin_argv); AUDIT_ARG_ENVV(args->begin_envv, args->envc, args->endp - args->begin_envv); return (do_execve(td, args, mac_p)); } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(td, args, mac_p) struct thread *td; struct image_args *args; struct mac *mac_p; { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *newcred = NULL, *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts, *newsigacts; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif struct vnode *oldtextvp = NULL, *newtextvp; cap_rights_t rights; int credential_changing; int textset; #ifdef MAC struct label *interpvplabel = NULL; int will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif /* * Translate the file name. namei() returns a vnode pointer * in ni_vp amoung other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. */ if (args->fname != NULL) { NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, cap_rights_init(&rights, CAP_FEXECVE), &newtextvp); if (error) goto exec_fail; vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); /* * Set VV_TEXT now so no one can write to the executable while we're * activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ textset = VOP_IS_TEXT(imgp->vp); VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) { if (textset == 0) VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * VV_TEXT needs to be unset for scripts. There is a short * period before we determine that something is a script where * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = 0; } vput(newtextvp); vm_object_deallocate(imgp->object); imgp->object = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp, 0); /* * Do the best to calculate the full path to the image file. */ if (imgp->auxargs != NULL && ((args->fname != NULL && args->fname[0] == '/') || vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) imgp->execpath = args->fname; if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* ABI enforces the use of Capsicum. Switch into capabilities mode. */ if (SV_PROC_FLAG(p, SV_CAPSICUM)) sys_cap_enter(td, NULL); /* * Copy out strings (args and env) and initialize stack base */ if (p->p_sysent->sv_copyout_strings) stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); else stack_base = exec_copyout_strings(imgp); /* * If custom stack fixup routine present for this process * let it do the stack setup. * Else stuff argument count as first item on stack */ if (p->p_sysent->sv_fixup != NULL) (*p->p_sysent->sv_fixup)(&stack_base, imgp); else suword(--stack_base, imgp->args->argc); if (args->fdp != NULL) { /* Install a brand new file descriptor table. */ fdinstall_remapped(td, args->fdp); args->fdp = NULL; } else { /* * Keep on using the existing file descriptor table. For * security and other reasons, the file descriptor table * cannot be shared after an exec. */ fdunshare(td); /* close files on exec */ fdcloseexec(td); } /* * Malloc things before we need locks. */ i = imgp->args->begin_envv - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } else { oldsigacts = NULL; newsigacts = NULL; /* satisfy gcc */ } PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; oldcred = p->p_ucred; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); } /* * Implement image setuid/setgid. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in compatibility mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = 0; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp); credential_changing |= will_transition; #endif if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE if (p->p_tracecred != NULL && priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0)) ktrprocexec(p, &tracecred, &tracevp); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); fdsetugidsafety(td); error = fdcheckstd(td); if (error != 0) goto done1; newcred = crdup(oldcred); euip = uifind(attr.va_uid); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); /* * Set the new credentials. */ if (attr.va_mode & S_ISUID) change_euid(newcred, euip); if (attr.va_mode & S_ISGID) change_egid(newcred, attr.va_gid); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, newcred, imgp->vp, interpvplabel, imgp); } #endif /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(newcred, newcred->cr_uid); change_svgid(newcred, newcred->cr_gid); proc_set_cred(p, newcred); } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); newcred = crdup(oldcred); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); change_svuid(newcred, newcred->cr_uid); change_svgid(newcred, newcred->cr_gid); proc_set_cred(p, newcred); } } /* * Store the vp for use in procfs. This vnode was referenced by namei * or fgetvp_exec. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. * * The proc lock needs to be released before taking the PMC * SX. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } else PROC_UNLOCK(p); #else /* !HWPMC_HOOKS */ PROC_UNLOCK(p); #endif /* Set values passed into the program in registers. */ if (p->p_sysent->sv_setregs) (*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base); else exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); vfs_mark_atime(imgp->vp, td->td_ucred); SDT_PROBE(proc, kernel, , exec__success, args->fname, 0, 0, 0, 0); VOP_UNLOCK(imgp->vp, 0); done1: /* * Free any resources malloc'd earlier that we didn't use. */ if (euip != NULL) uifree(euip); if (newcred != NULL) crfree(oldcred); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); #ifdef KTRACE if (tracevp != NULL) vrele(tracevp); if (tracecred != NULL) crfree(tracecred); #endif vn_lock(imgp->vp, LK_SHARED | LK_RETRY); pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); exec_fail_dealloc: /* * free various allocated resources */ if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (args->fname) NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp, 0); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { PROC_LOCK(p); td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); /* * Stop the process here if its stop event mask has * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); goto done2; } exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); done2: #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif return (error); } int exec_map_first_page(imgp) struct image_params *imgp; { int rv, i; int initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); VM_OBJECT_WLOCK(object); #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); if (ma[0]->valid != VM_PAGE_BITS_ALL) { initial_pagein = VM_INITIAL_PAGEIN; if (initial_pagein > object->size) initial_pagein = object->size; for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; if (vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); if (ma[i] == NULL) break; } } initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, 0); if (rv != VM_PAGER_OK) { vm_page_lock(ma[0]); vm_page_free(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } } vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_activate(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(imgp) struct image_params *imgp; { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } } /* * Destroy old address space, and allocate a new stack * The new stack is only SGROWSIZ large because it is grown * automatically in trap.c. */ int exec_new_vmspace(imgp, sv) struct image_params *imgp; struct sysentvec *sv; { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; u_long ssiz; imgp->vmspace_destroyed = 1; imgp->sysent = sv; /* May be called with Giant held */ EVENTHANDLER_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error) { vm_object_deallocate(obj); return (error); } } /* Allocate a new stack */ if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } stack_addr = sv->sv_usrstack - ssiz; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error) return (error); /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long argp, envp; int error; size_t length; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ for (;;) { error = fueword(argv++, &argp); if (error == -1) { error = EFAULT; goto err_exit; } if (argp == 0) break; error = copyinstr((void *)(uintptr_t)argp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &envp); if (error == -1) { error = EFAULT; goto err_exit; } if (envp == 0) break; error = copyinstr((void *)(uintptr_t)envp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int exec_copyin_data_fds(struct thread *td, struct image_args *args, const void *data, size_t datalen, const int *fds, size_t fdslen) { struct filedesc *ofdp; const char *p; int *kfds; int error; memset(args, '\0', sizeof(*args)); ofdp = td->td_proc->p_fd; if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1) return (E2BIG); error = exec_alloc_args(args); if (error != 0) return (error); args->begin_argv = args->buf; args->stringspace = ARG_MAX; if (datalen > 0) { /* * Argument buffer has been provided. Copy it into the * kernel as a single string and add a terminating null * byte. */ error = copyin(data, args->begin_argv, datalen); if (error != 0) goto err_exit; args->begin_argv[datalen] = '\0'; args->endp = args->begin_argv + datalen + 1; args->stringspace -= datalen + 1; /* * Traditional argument counting. Count the number of * null bytes. */ for (p = args->begin_argv; p < args->endp; ++p) if (*p == '\0') ++args->argc; } else { /* No argument buffer provided. */ args->endp = args->begin_argv; } /* There are no environment variables. */ args->begin_envv = args->endp; /* Create new file descriptor table. */ kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK); error = copyin(fds, kfds, fdslen * sizeof(int)); if (error != 0) { free(kfds, M_TEMP); goto err_exit; } error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp); free(kfds, M_TEMP); if (error != 0) goto err_exit; return (0); err_exit: exec_free_args(args); return (error); } /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. Returns zero if the allocation succeeds * and ENOMEM otherwise. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); return (args->buf != NULL ? 0 : ENOMEM); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, PATH_MAX + ARG_MAX); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } if (args->fdp != NULL) fdescfree_remapped(args->fdp); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ register_t * exec_copyout_strings(imgp) struct image_params *imgp; { int argc, envc; char **vectp; char *stringp; uintptr_t destp; register_t *stack_base; struct ps_strings *arginfo; struct proc *p; size_t execpath_len; int szsigcode, szps; char canary[sizeof(long) * 8]; szps = sizeof(pagesizes[0]) * MAXPAGESIZES; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; szsigcode = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; if (p->p_sysent->sv_sigcode_base == 0) { if (p->p_sysent->sv_szsigcode != NULL) szsigcode = *(p->p_sysent->sv_szsigcode); } destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ destp -= szps; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = destp; copyout(pagesizes, (void *)destp, szps); imgp->pagesizeslen = szps; destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(imgp) struct image_params *imgp; { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error, writecount; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) return (error); if (writecount != 0) return (ETXTBSY); /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = 1; return (error); } /* * Exec handler registration */ int exec_register(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); if (newexecsw == NULL) return (ENOMEM); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); if (newexecsw == NULL) return (ENOMEM); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } Index: head/sys/sys/exec.h =================================================================== --- head/sys/sys/exec.h (revision 287536) +++ head/sys/sys/exec.h (revision 287537) @@ -1,128 +1,130 @@ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)exec.h 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS_EXEC_H_ #define _SYS_EXEC_H_ /* * Before ps_args existed, the following structure, found at the top of * the user stack of each user process, was used by ps(1) to locate * environment and argv strings. Normally ps_argvstr points to the * argv vector, and ps_nargvstr is the same as the program's argc. The * fields ps_envstr and ps_nenvstr are the equivalent for the environment. * * Programs should now use setproctitle(3) to change ps output. * setproctitle() always informs the kernel with sysctl and sets the * pointers in ps_strings. The kern.proc.args sysctl first tries p_args. * If p_args is NULL, it then falls back to reading ps_strings and following * the pointers. */ struct ps_strings { char **ps_argvstr; /* first of 0 or more argument strings */ unsigned int ps_nargvstr; /* the number of argument strings */ char **ps_envstr; /* first of 0 or more environment strings */ unsigned int ps_nenvstr; /* the number of environment strings */ }; /* * Address of ps_strings structure (in user space). * Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant. */ #define PS_STRINGS (USRSTACK - sizeof(struct ps_strings)) #define SPARE_USRSPACE 4096 struct image_params; struct execsw { int (*ex_imgact)(struct image_params *); const char *ex_name; }; #include #ifdef _KERNEL #include int exec_map_first_page(struct image_params *); void exec_unmap_first_page(struct image_params *); int exec_register(const struct execsw *); int exec_unregister(const struct execsw *); +extern int coredump_pack_fileinfo; + /* * note: name##_mod cannot be const storage because the * linker_file_sysinit() function modifies _file in the * moduledata_t. */ #include #define EXEC_SET(name, execsw_arg) \ static int __CONCAT(name,_modevent)(module_t mod, int type, \ void *data) \ { \ struct execsw *exec = (struct execsw *)data; \ int error = 0; \ switch (type) { \ case MOD_LOAD: \ /* printf(#name " module loaded\n"); */ \ error = exec_register(exec); \ if (error) \ printf(__XSTRING(name) "register failed\n"); \ break; \ case MOD_UNLOAD: \ /* printf(#name " module unloaded\n"); */ \ error = exec_unregister(exec); \ if (error) \ printf(__XSTRING(name) " unregister failed\n");\ break; \ default: \ error = EOPNOTSUPP; \ break; \ } \ return error; \ } \ static moduledata_t __CONCAT(name,_mod) = { \ __XSTRING(name), \ __CONCAT(name,_modevent), \ (void *)& execsw_arg \ }; \ DECLARE_MODULE_TIED(name, __CONCAT(name,_mod), SI_SUB_EXEC, \ SI_ORDER_ANY) #endif #endif