Index: stable/10/sys/kern/imgact_elf.c =================================================================== --- stable/10/sys/kern/imgact_elf.c (revision 281847) +++ stable/10/sys/kern/imgact_elf.c (revision 281848) @@ -1,2184 +1,2185 @@ /*- * Copyright (c) 2000 David O'Brien * Copyright (c) 1995-1996 Søren Schmidt * Copyright (c) 1996 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_compat.h" #include "opt_core.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ELF_NOTE_ROUNDSIZE 4 #define OLD_EI_BRAND 8 static int __elfN(check_header)(const Elf_Ehdr *hdr); static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int interp_name_len, int32_t *osrel); static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry, size_t pagesize); static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, size_t pagesize); static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel); static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); static boolean_t __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel); static vm_prot_t __elfN(trans_prot)(Elf_Word); static Elf_Word __elfN(untrans_prot)(vm_prot_t); SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0, ""); #ifdef COMPRESS_USER_CORES static int compress_core(gzFile, char *, char *, unsigned int, struct thread * td); #endif #define CORE_BUF_SIZE (16 * 1024) int __elfN(fallback_brand) = -1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand", &__elfN(fallback_brand)); static int elf_legacy_coredump = 0; SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, &elf_legacy_coredump, 0, ""); int __elfN(nxstack) = #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ 1; #else 0; #endif SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack"); #if __ELF_WORD_SIZE == 32 #if defined(__amd64__) || defined(__ia64__) int i386_read_exec = 0; SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif #endif static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define trunc_page_ps(va, ps) ((va) & ~(ps - 1)) #define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1)) #define aligned(a, t) (trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a)) static const char FREEBSD_ABI_VENDOR[] = "FreeBSD"; Elf_Brandnote __elfN(freebsd_brandnote) = { .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), .hdr.n_descsz = sizeof(int32_t), .hdr.n_type = 1, .vendor = FREEBSD_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = __elfN(freebsd_trans_osrel) }; static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) { uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); *osrel = *(const int32_t *)(p); return (TRUE); } static const char GNU_ABI_VENDOR[] = "GNU"; static int GNU_KFREEBSD_ABI_DESC = 3; Elf_Brandnote __elfN(kfreebsd_brandnote) = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = kfreebsd_trans_osrel }; static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); desc = (const Elf32_Word *)p; if (desc[0] != GNU_KFREEBSD_ABI_DESC) return (FALSE); /* * Debian GNU/kFreeBSD embed the earliest compatible kernel version * (__FreeBSD_version: Rxx) in the LSB way. */ *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3]; return (TRUE); } int __elfN(insert_brand_entry)(Elf_Brandinfo *entry) { int i; for (i = 0; i < MAX_BRANDS; i++) { if (elf_brand_list[i] == NULL) { elf_brand_list[i] = entry; break; } } if (i == MAX_BRANDS) { printf("WARNING: %s: could not insert brandinfo entry: %p\n", __func__, entry); return (-1); } return (0); } int __elfN(remove_brand_entry)(Elf_Brandinfo *entry) { int i; for (i = 0; i < MAX_BRANDS; i++) { if (elf_brand_list[i] == entry) { elf_brand_list[i] = NULL; break; } } if (i == MAX_BRANDS) return (-1); return (0); } int __elfN(brand_inuse)(Elf_Brandinfo *entry) { struct proc *p; int rval = FALSE; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sysent == entry->sysvec) { rval = TRUE; break; } } sx_sunlock(&allproc_lock); return (rval); } static Elf_Brandinfo * __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int interp_name_len, int32_t *osrel) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; Elf_Brandinfo *bi; boolean_t ret; int i; /* * We support four types of branding -- (1) the ELF EI_OSABI field * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string * branding w/in the ELF header, (3) path of the `interp_path' * field, and (4) the ".note.ABI-tag" ELF section. */ /* Look for an ".note.ABI-tag" ELF section */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL) continue; if (hdr->e_machine == bi->machine && (bi->flags & (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { ret = __elfN(check_note)(imgp, bi->brand_note, osrel); if (ret) return (bi); } } /* If the executable has a brand, search for it in the brand list. */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) continue; if (hdr->e_machine == bi->machine && (hdr->e_ident[EI_OSABI] == bi->brand || strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0)) return (bi); } /* Lacking a known brand, search for a recognized interpreter. */ if (interp != NULL) { for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) continue; if (hdr->e_machine == bi->machine && /* ELF image p_filesz includes terminating zero */ strlen(bi->interp_path) + 1 == interp_name_len && strncmp(interp, bi->interp_path, interp_name_len) == 0) return (bi); } } /* Lacking a recognized interpreter, try the default brand */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) continue; if (hdr->e_machine == bi->machine && __elfN(fallback_brand) == bi->brand) return (bi); } return (NULL); } static int __elfN(check_header)(const Elf_Ehdr *hdr) { Elf_Brandinfo *bi; int i; if (!IS_ELF(*hdr) || hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA || hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_phentsize != sizeof(Elf_Phdr) || hdr->e_version != ELF_TARG_VER) return (ENOEXEC); /* * Make sure we have at least one brand for this machine. */ for (i = 0; i < MAX_BRANDS; i++) { bi = elf_brand_list[i]; if (bi != NULL && bi->machine == hdr->e_machine) break; } if (i == MAX_BRANDS) return (ENOEXEC); return (0); } static int __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot) { struct sf_buf *sf; int error; vm_offset_t off; /* * Create the page if it doesn't exist yet. Ignore errors. */ vm_map_lock(map); vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end), VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); /* * Find the page from the underlying object. */ if (object) { sf = vm_imgact_map_page(object, offset); if (sf == NULL) return (KERN_FAILURE); off = offset - trunc_page(offset); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } } return (KERN_SUCCESS); } static int __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow) { struct sf_buf *sf; vm_offset_t off; vm_size_t sz; int error, rv; if (start != trunc_page(start)) { rv = __elfN(map_partial)(map, object, offset, start, round_page(start), prot); if (rv) return (rv); offset += round_page(start) - start; start = round_page(start); } if (end != round_page(end)) { rv = __elfN(map_partial)(map, object, offset + trunc_page(end) - start, trunc_page(end), end, prot); if (rv) return (rv); end = trunc_page(end); } if (end > start) { if (offset & PAGE_MASK) { /* * The mapping is not page aligned. This means we have * to copy the data. Sigh. */ rv = vm_map_find(map, NULL, 0, &start, end - start, 0, VMFS_NO_SPACE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0); if (rv) return (rv); if (object == NULL) return (KERN_SUCCESS); for (; start < end; start += sz) { sf = vm_imgact_map_page(object, offset); if (sf == NULL) return (KERN_FAILURE); off = offset - trunc_page(offset); sz = end - start; if (sz > PAGE_SIZE - off) sz = PAGE_SIZE - off; error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } offset += sz; } rv = KERN_SUCCESS; } else { vm_object_reference(object); vm_map_lock(map); rv = vm_map_insert(map, object, offset, start, end, prot, VM_PROT_ALL, cow); vm_map_unlock(map); if (rv != KERN_SUCCESS) vm_object_deallocate(object); } return (rv); } else { return (KERN_SUCCESS); } } static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, size_t pagesize) { struct sf_buf *sf; size_t map_len; vm_map_t map; vm_object_t object; vm_offset_t map_addr; int error, rv, cow; size_t copy_len; vm_offset_t file_addr; /* * It's necessary to fail if the filsz + offset taken from the * header is greater than the actual file pager object's size. * If we were to allow this, then the vm_map_find() below would * walk right off the end of the file object and into the ether. * * While I'm here, might as well check for something else that * is invalid: filsz cannot be greater than memsz. */ if ((off_t)filsz + offset > imgp->attr->va_size || filsz > memsz) { uprintf("elf_load_section: truncated ELF file\n"); return (ENOEXEC); } object = imgp->object; map = &imgp->proc->p_vmspace->vm_map; map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize); file_addr = trunc_page_ps(offset, pagesize); /* * We have two choices. We can either clear the data in the last page * of an oversized mapping, or we can start the anon mapping a page * early and copy the initialized data into that first page. We * choose the second.. */ if (memsz > filsz) map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr; else map_len = round_page_ps(offset + filsz, pagesize) - file_addr; if (map_len != 0) { /* cow flags: don't dump readonly sections in core */ cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); rv = __elfN(map_insert)(map, object, file_addr, /* file offset */ map_addr, /* virtual start */ map_addr + map_len,/* virtual end */ prot, cow); if (rv != KERN_SUCCESS) return (EINVAL); /* we can stop now if we've covered it all */ if (memsz == filsz) { return (0); } } /* * We have to get the remaining bit of the file into the first part * of the oversized map segment. This is normally because the .data * segment in the file is extended to provide bss. It's a neat idea * to try and save a page, but it's a pain in the behind to implement. */ copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize); map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize); map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) - map_addr; /* This had damn well better be true! */ if (map_len != 0) { rv = __elfN(map_insert)(map, NULL, 0, map_addr, map_addr + map_len, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) { return (EINVAL); } } if (copy_len != 0) { vm_offset_t off; sf = vm_imgact_map_page(object, offset + filsz); if (sf == NULL) return (EIO); /* send the page fragment to user space */ off = trunc_page_ps(offset + filsz, pagesize) - trunc_page(offset + filsz); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)map_addr, copy_len); vm_imgact_unmap_page(sf); if (error) { return (error); } } /* * set it to the specified protection. * XXX had better undo the damage from pasting over the cracks here! */ vm_map_protect(map, trunc_page(map_addr), round_page(map_addr + map_len), prot, FALSE); return (0); } /* * Load the file "file" into memory. It may be either a shared object * or an executable. * * The "addr" reference parameter is in/out. On entry, it specifies * the address where a shared object should be loaded. If the file is * an executable, this value is ignored. On exit, "addr" specifies * where the file was actually loaded. * * The "entry" reference parameter is out only. On exit, it specifies * the entry point for the loaded file. */ static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry, size_t pagesize) { struct { struct nameidata nd; struct vattr attr; struct image_params image_params; } *tempdata; const Elf_Ehdr *hdr = NULL; const Elf_Phdr *phdr = NULL; struct nameidata *nd; struct vattr *attr; struct image_params *imgp; vm_prot_t prot; u_long rbase; u_long base_addr = 0; int error, i, numsegs; #ifdef CAPABILITY_MODE /* * XXXJA: This check can go away once we are sufficiently confident * that the checks in namei() are correct. */ if (IN_CAPABILITY_MODE(curthread)) return (ECAPMODE); #endif tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK); nd = &tempdata->nd; attr = &tempdata->attr; imgp = &tempdata->image_params; /* * Initialize part of the common data */ imgp->proc = p; imgp->attr = attr; imgp->firstpage = NULL; imgp->image_header = NULL; imgp->object = NULL; imgp->execlabel = NULL; NDINIT(nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, curthread); if ((error = namei(nd)) != 0) { nd->ni_vp = NULL; goto fail; } NDFREE(nd, NDF_ONLY_PNBUF); imgp->vp = nd->ni_vp; /* * Check permissions, modes, uid, etc on the file, and "open" it. */ error = exec_check_permissions(imgp); if (error) goto fail; error = exec_map_first_page(imgp); if (error) goto fail; /* * Also make certain that the interpreter stays the same, so set * its VV_TEXT flag, too. */ VOP_SET_TEXT(nd->ni_vp); imgp->object = nd->ni_vp->v_object; hdr = (const Elf_Ehdr *)imgp->image_header; if ((error = __elfN(check_header)(hdr)) != 0) goto fail; if (hdr->e_type == ET_DYN) rbase = *addr; else if (hdr->e_type == ET_EXEC) rbase = 0; else { error = ENOEXEC; goto fail; } /* Only support headers that fit within first page for now */ if ((hdr->e_phoff > PAGE_SIZE) || (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) { error = ENOEXEC; goto fail; } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); if (!aligned(phdr, Elf_Addr)) { error = ENOEXEC; goto fail; } for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) { /* Loadable segment */ prot = __elfN(trans_prot)(phdr[i].p_flags); error = __elfN(load_section)(imgp, phdr[i].p_offset, (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, phdr[i].p_memsz, phdr[i].p_filesz, prot, pagesize); if (error != 0) goto fail; /* * Establish the base address if this is the * first segment. */ if (numsegs == 0) base_addr = trunc_page(phdr[i].p_vaddr + rbase); numsegs++; } } *addr = base_addr; *entry = (unsigned long)hdr->e_entry + rbase; fail: if (imgp->firstpage) exec_unmap_first_page(imgp); if (nd->ni_vp) vput(nd->ni_vp); free(tempdata, M_TEMP); return (error); } static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs; struct vmspace *vmspace; vm_prot_t prot; u_long text_size = 0, data_size = 0, total_size = 0; u_long text_addr = 0, data_addr = 0; u_long seg_size, seg_addr; u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0; int32_t osrel = 0; int error = 0, i, n, interp_name_len = 0; const char *interp = NULL, *newinterp = NULL; Elf_Brandinfo *brand_info; char *path; struct sysentvec *sv; /* * Do we have a valid ELF header ? * * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later * if particular brand doesn't support it. */ if (__elfN(check_header)(hdr) != 0 || (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) return (-1); /* * From here on down, we return an errno, not -1, as we've * detected an ELF file. */ if ((hdr->e_phoff > PAGE_SIZE) || (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) { /* Only support headers in first page for now */ return (ENOEXEC); } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); if (!aligned(phdr, Elf_Addr)) return (ENOEXEC); n = 0; baddr = 0; for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: if (n == 0) baddr = phdr[i].p_vaddr; n++; break; case PT_INTERP: /* Path to interpreter */ if (phdr[i].p_filesz > MAXPATHLEN || phdr[i].p_offset > PAGE_SIZE || phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset) return (ENOEXEC); interp = imgp->image_header + phdr[i].p_offset; interp_name_len = phdr[i].p_filesz; break; case PT_GNU_STACK: if (__elfN(nxstack)) imgp->stack_prot = __elfN(trans_prot)(phdr[i].p_flags); + imgp->stack_sz = phdr[i].p_memsz; break; } } brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len, &osrel); if (brand_info == NULL) { uprintf("ELF binary type \"%u\" not known.\n", hdr->e_ident[EI_OSABI]); return (ENOEXEC); } if (hdr->e_type == ET_DYN) { if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) return (ENOEXEC); /* * Honour the base load address from the dso if it is * non-zero for some reason. */ if (baddr == 0) et_dyn_addr = ET_DYN_LOAD_ADDR; else et_dyn_addr = 0; } else et_dyn_addr = 0; sv = brand_info->sysvec; if (interp != NULL && brand_info->interp_newpath != NULL) newinterp = brand_info->interp_newpath; /* * Avoid a possible deadlock if the current address space is destroyed * and that address space maps the locked vnode. In the common case, * the locked vnode's v_usecount is decremented but remains greater * than zero. Consequently, the vnode lock is not needed by vrele(). * However, in cases where the vnode lock is external, such as nullfs, * v_usecount may become zero. * * The VV_TEXT flag prevents modifications to the executable while * the vnode is unlocked. */ VOP_UNLOCK(imgp->vp, 0); error = exec_new_vmspace(imgp, sv); imgp->proc->p_sysent = sv; vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: /* Loadable segment */ if (phdr[i].p_memsz == 0) break; prot = __elfN(trans_prot)(phdr[i].p_flags); error = __elfN(load_section)(imgp, phdr[i].p_offset, (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr, phdr[i].p_memsz, phdr[i].p_filesz, prot, sv->sv_pagesize); if (error != 0) return (error); /* * If this segment contains the program headers, * remember their virtual address for the AT_PHDR * aux entry. Static binaries don't usually include * a PT_PHDR entry. */ if (phdr[i].p_offset == 0 && hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize <= phdr[i].p_filesz) proghdr = phdr[i].p_vaddr + hdr->e_phoff + et_dyn_addr; seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); seg_size = round_page(phdr[i].p_memsz + phdr[i].p_vaddr + et_dyn_addr - seg_addr); /* * Make the largest executable segment the official * text segment and all others data. * * Note that obreak() assumes that data_addr + * data_size == end of data load area, and the ELF * file format expects segments to be sorted by * address. If multiple data segments exist, the * last one will be used. */ if (phdr[i].p_flags & PF_X && text_size < seg_size) { text_size = seg_size; text_addr = seg_addr; } else { data_size = seg_size; data_addr = seg_addr; } total_size += seg_size; break; case PT_PHDR: /* Program header table info */ proghdr = phdr[i].p_vaddr + et_dyn_addr; break; default: break; } } if (data_addr == 0 && data_size == 0) { data_addr = text_addr; data_size = text_size; } entry = (u_long)hdr->e_entry + et_dyn_addr; /* * Check limits. It should be safe to check the * limits after loading the segments since we do * not actually fault in all the segments pages. */ PROC_LOCK(imgp->proc); if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) || text_size > maxtsiz || total_size > lim_cur(imgp->proc, RLIMIT_VMEM) || racct_set(imgp->proc, RACCT_DATA, data_size) != 0 || racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) { PROC_UNLOCK(imgp->proc); return (ENOMEM); } vmspace = imgp->proc->p_vmspace; vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; /* * We load the dynamic linker where a userland call * to mmap(0, ...) would put it. The rationale behind this * calculation is that it leaves room for the heap to grow to * its maximum allowed size. */ addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(imgp->proc, RLIMIT_DATA)); PROC_UNLOCK(imgp->proc); imgp->entry_addr = entry; if (interp != NULL) { int have_interp = FALSE; VOP_UNLOCK(imgp->vp, 0); if (brand_info->emul_path != NULL && brand_info->emul_path[0] != '\0') { path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); snprintf(path, MAXPATHLEN, "%s%s", brand_info->emul_path, interp); error = __elfN(load_file)(imgp->proc, path, &addr, &imgp->entry_addr, sv->sv_pagesize); free(path, M_TEMP); if (error == 0) have_interp = TRUE; } if (!have_interp && newinterp != NULL) { error = __elfN(load_file)(imgp->proc, newinterp, &addr, &imgp->entry_addr, sv->sv_pagesize); if (error == 0) have_interp = TRUE; } if (!have_interp) { error = __elfN(load_file)(imgp->proc, interp, &addr, &imgp->entry_addr, sv->sv_pagesize); } vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) { uprintf("ELF interpreter %s not found\n", interp); return (error); } } else addr = et_dyn_addr; /* * Construct auxargs table (used by the fixup routine) */ elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); elf_auxargs->execfd = -1; elf_auxargs->phdr = proghdr; elf_auxargs->phent = hdr->e_phentsize; elf_auxargs->phnum = hdr->e_phnum; elf_auxargs->pagesz = PAGE_SIZE; elf_auxargs->base = addr; elf_auxargs->flags = 0; elf_auxargs->entry = entry; imgp->auxargs = elf_auxargs; imgp->interpreted = 0; imgp->reloc_base = addr; imgp->proc->p_osrel = osrel; return (error); } #define suword __CONCAT(suword, __ELF_WORD_SIZE) int __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; Elf_Addr *base; Elf_Addr *pos; base = (Elf_Addr *)*stack_base; pos = base + (imgp->args->argc + imgp->args->envc + 2); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp); AUXARGS_ENTRY(pos, AT_OSRELDATE, imgp->proc->p_ucred->cr_prison->pr_osreldate); if (imgp->canary != 0) { AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary); AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen); } AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus); if (imgp->pagesizes != 0) { AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes); AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); } if (imgp->sysent->sv_timekeep_base != 0) { AUXARGS_ENTRY(pos, AT_TIMEKEEP, imgp->sysent->sv_timekeep_base); } AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : imgp->sysent->sv_stackprot); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; base--; suword(base, (long)imgp->args->argc); *stack_base = (register_t *)base; return (0); } /* * Code for generating ELF core dumps. */ typedef void (*segment_callback)(vm_map_entry_t, void *); /* Closure for cb_put_phdr(). */ struct phdr_closure { Elf_Phdr *phdr; /* Program header to fill in */ Elf_Off offset; /* Offset of segment in core file */ }; /* Closure for cb_size_segment(). */ struct sseg_closure { int count; /* Count of writable segments. */ size_t size; /* Total size of all writable segments. */ }; typedef void (*outfunc_t)(void *, struct sbuf *, size_t *); struct note_info { int type; /* Note type. */ outfunc_t outfunc; /* Output function. */ void *outarg; /* Argument for the output function. */ size_t outsize; /* Output size. */ TAILQ_ENTRY(note_info) link; /* Link to the next note info. */ }; TAILQ_HEAD(note_info_list, note_info); static void cb_put_phdr(vm_map_entry_t, void *); static void cb_size_segment(vm_map_entry_t, void *); static void each_writable_segment(struct thread *, segment_callback, void *); static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *, int, void *, size_t, struct note_info_list *, size_t, gzFile); static void __elfN(prepare_notes)(struct thread *, struct note_info_list *, size_t *); static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t); static void __elfN(putnote)(struct note_info *, struct sbuf *); static size_t register_note(struct note_info_list *, int, outfunc_t, void *); static int sbuf_drain_core_output(void *, const char *, int); static int sbuf_drain_count(void *arg, const char *data, int len); static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *); static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *); static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *); static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *); static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *); static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *); static void note_procstat_files(void *, struct sbuf *, size_t *); static void note_procstat_groups(void *, struct sbuf *, size_t *); static void note_procstat_osrel(void *, struct sbuf *, size_t *); static void note_procstat_rlimit(void *, struct sbuf *, size_t *); static void note_procstat_umask(void *, struct sbuf *, size_t *); static void note_procstat_vmmap(void *, struct sbuf *, size_t *); #ifdef COMPRESS_USER_CORES extern int compress_user_cores; extern int compress_user_cores_gzlevel; #endif static int core_output(struct vnode *vp, void *base, size_t len, off_t offset, struct ucred *active_cred, struct ucred *file_cred, struct thread *td, char *core_buf, gzFile gzfile) { int error; if (gzfile) { #ifdef COMPRESS_USER_CORES error = compress_core(gzfile, base, core_buf, len, td); #else panic("shouldn't be here"); #endif } else { error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset, UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred, NULL, td); } return (error); } /* Coredump output parameters for sbuf drain routine. */ struct sbuf_drain_core_params { off_t offset; struct ucred *active_cred; struct ucred *file_cred; struct thread *td; struct vnode *vp; #ifdef COMPRESS_USER_CORES gzFile gzfile; #endif }; /* * Drain into a core file. */ static int sbuf_drain_core_output(void *arg, const char *data, int len) { struct sbuf_drain_core_params *p; int error, locked; p = (struct sbuf_drain_core_params *)arg; /* * Some kern_proc out routines that print to this sbuf may * call us with the process lock held. Draining with the * non-sleepable lock held is unsafe. The lock is needed for * those routines when dumping a live process. In our case we * can safely release the lock before draining and acquire * again after. */ locked = PROC_LOCKED(p->td->td_proc); if (locked) PROC_UNLOCK(p->td->td_proc); #ifdef COMPRESS_USER_CORES if (p->gzfile != Z_NULL) error = compress_core(p->gzfile, NULL, __DECONST(char *, data), len, p->td); else #endif error = vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, data), len, p->offset, UIO_SYSSPACE, IO_UNIT | IO_DIRECT, p->active_cred, p->file_cred, NULL, p->td); if (locked) PROC_LOCK(p->td->td_proc); if (error != 0) return (-error); p->offset += len; return (len); } /* * Drain into a counter. */ static int sbuf_drain_count(void *arg, const char *data __unused, int len) { size_t *sizep; sizep = (size_t *)arg; *sizep += len; return (len); } int __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) { struct ucred *cred = td->td_ucred; int error = 0; struct sseg_closure seginfo; struct note_info_list notelst; struct note_info *ninfo; void *hdr; size_t hdrsize, notesz, coresize; gzFile gzfile = Z_NULL; char *core_buf = NULL; #ifdef COMPRESS_USER_CORES char gzopen_flags[8]; char *p; int doing_compress = flags & IMGACT_CORE_COMPRESS; #endif hdr = NULL; TAILQ_INIT(¬elst); #ifdef COMPRESS_USER_CORES if (doing_compress) { p = gzopen_flags; *p++ = 'w'; if (compress_user_cores_gzlevel >= 0 && compress_user_cores_gzlevel <= 9) *p++ = '0' + compress_user_cores_gzlevel; *p = 0; gzfile = gz_open("", gzopen_flags, vp); if (gzfile == Z_NULL) { error = EFAULT; goto done; } core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); if (!core_buf) { error = ENOMEM; goto done; } } #endif /* Size the program segments. */ seginfo.count = 0; seginfo.size = 0; each_writable_segment(td, cb_size_segment, &seginfo); /* * Collect info about the core file header area. */ hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count); __elfN(prepare_notes)(td, ¬elst, ¬esz); coresize = round_page(hdrsize + notesz) + seginfo.size; #ifdef RACCT PROC_LOCK(td->td_proc); error = racct_add(td->td_proc, RACCT_CORE, coresize); PROC_UNLOCK(td->td_proc); if (error != 0) { error = EFAULT; goto done; } #endif if (coresize >= limit) { error = EFAULT; goto done; } /* * Allocate memory for building the header, fill it up, * and write it out following the notes. */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); if (hdr == NULL) { error = EINVAL; goto done; } error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize, ¬elst, notesz, gzfile); /* Write the contents of all of the writable segments. */ if (error == 0) { Elf_Phdr *php; off_t offset; int i; php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; offset = round_page(hdrsize + notesz); for (i = 0; i < seginfo.count; i++) { error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr, php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile); if (error != 0) break; offset += php->p_filesz; php++; } } if (error) { log(LOG_WARNING, "Failed to write core file for process %s (error %d)\n", curproc->p_comm, error); } done: #ifdef COMPRESS_USER_CORES if (core_buf) free(core_buf, M_TEMP); if (gzfile) gzclose(gzfile); #endif while ((ninfo = TAILQ_FIRST(¬elst)) != NULL) { TAILQ_REMOVE(¬elst, ninfo, link); free(ninfo, M_TEMP); } if (hdr != NULL) free(hdr, M_TEMP); return (error); } /* * A callback for each_writable_segment() to write out the segment's * program header entry. */ static void cb_put_phdr(entry, closure) vm_map_entry_t entry; void *closure; { struct phdr_closure *phc = (struct phdr_closure *)closure; Elf_Phdr *phdr = phc->phdr; phc->offset = round_page(phc->offset); phdr->p_type = PT_LOAD; phdr->p_offset = phc->offset; phdr->p_vaddr = entry->start; phdr->p_paddr = 0; phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; phdr->p_align = PAGE_SIZE; phdr->p_flags = __elfN(untrans_prot)(entry->protection); phc->offset += phdr->p_filesz; phc->phdr++; } /* * A callback for each_writable_segment() to gather information about * the number of segments and their total size. */ static void cb_size_segment(entry, closure) vm_map_entry_t entry; void *closure; { struct sseg_closure *ssc = (struct sseg_closure *)closure; ssc->count++; ssc->size += entry->end - entry->start; } /* * For each writable segment in the process's memory map, call the given * function with a pointer to the map entry and some arbitrary * caller-supplied data. */ static void each_writable_segment(td, func, closure) struct thread *td; segment_callback func; void *closure; { struct proc *p = td->td_proc; vm_map_t map = &p->p_vmspace->vm_map; vm_map_entry_t entry; vm_object_t backing_object, object; boolean_t ignore_entry; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { /* * Don't dump inaccessible mappings, deal with legacy * coredump mode. * * Note that read-only segments related to the elf binary * are marked MAP_ENTRY_NOCOREDUMP now so we no longer * need to arbitrarily ignore such segments. */ if (elf_legacy_coredump) { if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) continue; } else { if ((entry->protection & VM_PROT_ALL) == 0) continue; } /* * Dont include memory segment in the coredump if * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in * madvise(2). Do not dump submaps (i.e. parts of the * kernel map). */ if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) continue; if ((object = entry->object.vm_object) == NULL) continue; /* Ignore memory-mapped devices and such things. */ VM_OBJECT_RLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_RLOCK(backing_object); VM_OBJECT_RUNLOCK(object); object = backing_object; } ignore_entry = object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE && object->type != OBJT_PHYS; VM_OBJECT_RUNLOCK(object); if (ignore_entry) continue; (*func)(entry, closure); } vm_map_unlock_read(map); } /* * Write the core file header to the file, including padding up to * the page boundary. */ static int __elfN(corehdr)(struct thread *td, struct vnode *vp, struct ucred *cred, int numsegs, void *hdr, size_t hdrsize, struct note_info_list *notelst, size_t notesz, gzFile gzfile) { struct sbuf_drain_core_params params; struct note_info *ninfo; struct sbuf *sb; int error; /* Fill in the header. */ bzero(hdr, hdrsize); __elfN(puthdr)(td, hdr, hdrsize, numsegs, notesz); params.offset = 0; params.active_cred = cred; params.file_cred = NOCRED; params.td = td; params.vp = vp; #ifdef COMPRESS_USER_CORES params.gzfile = gzfile; #endif sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_core_output, ¶ms); sbuf_start_section(sb, NULL); sbuf_bcat(sb, hdr, hdrsize); TAILQ_FOREACH(ninfo, notelst, link) __elfN(putnote)(ninfo, sb); /* Align up to a page boundary for the program segments. */ sbuf_end_section(sb, -1, PAGE_SIZE, 0); error = sbuf_finish(sb); sbuf_delete(sb); return (error); } static void __elfN(prepare_notes)(struct thread *td, struct note_info_list *list, size_t *sizep) { struct proc *p; struct thread *thr; size_t size; p = td->td_proc; size = 0; size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p); /* * To have the debugger select the right thread (LWP) as the initial * thread, we dump the state of the thread passed to us in td first. * This is the thread that causes the core dump and thus likely to * be the right thread one wants to have selected in the debugger. */ thr = td; while (thr != NULL) { size += register_note(list, NT_PRSTATUS, __elfN(note_prstatus), thr); size += register_note(list, NT_FPREGSET, __elfN(note_fpregset), thr); size += register_note(list, NT_THRMISC, __elfN(note_thrmisc), thr); size += register_note(list, -1, __elfN(note_threadmd), thr); thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) : TAILQ_NEXT(thr, td_plist); if (thr == td) thr = TAILQ_NEXT(thr, td_plist); } size += register_note(list, NT_PROCSTAT_PROC, __elfN(note_procstat_proc), p); size += register_note(list, NT_PROCSTAT_FILES, note_procstat_files, p); size += register_note(list, NT_PROCSTAT_VMMAP, note_procstat_vmmap, p); size += register_note(list, NT_PROCSTAT_GROUPS, note_procstat_groups, p); size += register_note(list, NT_PROCSTAT_UMASK, note_procstat_umask, p); size += register_note(list, NT_PROCSTAT_RLIMIT, note_procstat_rlimit, p); size += register_note(list, NT_PROCSTAT_OSREL, note_procstat_osrel, p); size += register_note(list, NT_PROCSTAT_PSSTRINGS, __elfN(note_procstat_psstrings), p); size += register_note(list, NT_PROCSTAT_AUXV, __elfN(note_procstat_auxv), p); *sizep = size; } static void __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs, size_t notesz) { Elf_Ehdr *ehdr; Elf_Phdr *phdr; struct phdr_closure phc; ehdr = (Elf_Ehdr *)hdr; phdr = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)); ehdr->e_ident[EI_MAG0] = ELFMAG0; ehdr->e_ident[EI_MAG1] = ELFMAG1; ehdr->e_ident[EI_MAG2] = ELFMAG2; ehdr->e_ident[EI_MAG3] = ELFMAG3; ehdr->e_ident[EI_CLASS] = ELF_CLASS; ehdr->e_ident[EI_DATA] = ELF_DATA; ehdr->e_ident[EI_VERSION] = EV_CURRENT; ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; ehdr->e_ident[EI_ABIVERSION] = 0; ehdr->e_ident[EI_PAD] = 0; ehdr->e_type = ET_CORE; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 ehdr->e_machine = ELF_ARCH32; #else ehdr->e_machine = ELF_ARCH; #endif ehdr->e_version = EV_CURRENT; ehdr->e_entry = 0; ehdr->e_phoff = sizeof(Elf_Ehdr); ehdr->e_flags = 0; ehdr->e_ehsize = sizeof(Elf_Ehdr); ehdr->e_phentsize = sizeof(Elf_Phdr); ehdr->e_phnum = numsegs + 1; ehdr->e_shentsize = sizeof(Elf_Shdr); ehdr->e_shnum = 0; ehdr->e_shstrndx = SHN_UNDEF; /* * Fill in the program header entries. */ /* The note segement. */ phdr->p_type = PT_NOTE; phdr->p_offset = hdrsize; phdr->p_vaddr = 0; phdr->p_paddr = 0; phdr->p_filesz = notesz; phdr->p_memsz = 0; phdr->p_flags = PF_R; phdr->p_align = ELF_NOTE_ROUNDSIZE; phdr++; /* All the writable segments from the program. */ phc.phdr = phdr; phc.offset = round_page(hdrsize + notesz); each_writable_segment(td, cb_put_phdr, &phc); } static size_t register_note(struct note_info_list *list, int type, outfunc_t out, void *arg) { struct note_info *ninfo; size_t size, notesize; size = 0; out(arg, NULL, &size); ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK); ninfo->type = type; ninfo->outfunc = out; ninfo->outarg = arg; ninfo->outsize = size; TAILQ_INSERT_TAIL(list, ninfo, link); if (type == -1) return (size); notesize = sizeof(Elf_Note) + /* note header */ roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } static size_t append_note_data(const void *src, void *dst, size_t len) { size_t padded_len; padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE); if (dst != NULL) { bcopy(src, dst, len); bzero((char *)dst + len, padded_len - len); } return (padded_len); } size_t __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp) { Elf_Note *note; char *buf; size_t notesize; buf = dst; if (buf != NULL) { note = (Elf_Note *)buf; note->n_namesz = sizeof(FREEBSD_ABI_VENDOR); note->n_descsz = size; note->n_type = type; buf += sizeof(*note); buf += append_note_data(FREEBSD_ABI_VENDOR, buf, sizeof(FREEBSD_ABI_VENDOR)); append_note_data(src, buf, size); if (descp != NULL) *descp = buf; } notesize = sizeof(Elf_Note) + /* note header */ roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + /* note name */ roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ return (notesize); } static void __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) { Elf_Note note; ssize_t old_len; if (ninfo->type == -1) { ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); return; } note.n_namesz = sizeof(FREEBSD_ABI_VENDOR); note.n_descsz = ninfo->outsize; note.n_type = ninfo->type; sbuf_bcat(sb, ¬e, sizeof(note)); sbuf_start_section(sb, &old_len); sbuf_bcat(sb, FREEBSD_ABI_VENDOR, sizeof(FREEBSD_ABI_VENDOR)); sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); if (note.n_descsz == 0) return; sbuf_start_section(sb, &old_len); ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); } /* * Miscellaneous note out functions. */ #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 #include typedef struct prstatus32 elf_prstatus_t; typedef struct prpsinfo32 elf_prpsinfo_t; typedef struct fpreg32 elf_prfpregset_t; typedef struct fpreg32 elf_fpregset_t; typedef struct reg32 elf_gregset_t; typedef struct thrmisc32 elf_thrmisc_t; #define ELF_KERN_PROC_MASK KERN_PROC_MASK32 typedef struct kinfo_proc32 elf_kinfo_proc_t; typedef uint32_t elf_ps_strings_t; #else typedef prstatus_t elf_prstatus_t; typedef prpsinfo_t elf_prpsinfo_t; typedef prfpregset_t elf_prfpregset_t; typedef prfpregset_t elf_fpregset_t; typedef gregset_t elf_gregset_t; typedef thrmisc_t elf_thrmisc_t; #define ELF_KERN_PROC_MASK 0 typedef struct kinfo_proc elf_kinfo_proc_t; typedef vm_offset_t elf_ps_strings_t; #endif static void __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; elf_prpsinfo_t *psinfo; p = (struct proc *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*psinfo), ("invalid size")); psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK); psinfo->pr_version = PRPSINFO_VERSION; psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); /* * XXX - We don't fill in the command line arguments properly * yet. */ strlcpy(psinfo->pr_psargs, p->p_comm, sizeof(psinfo->pr_psargs)); sbuf_bcat(sb, psinfo, sizeof(*psinfo)); free(psinfo, M_TEMP); } *sizep = sizeof(*psinfo); } static void __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; elf_prstatus_t *status; td = (struct thread *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*status), ("invalid size")); status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK); status->pr_version = PRSTATUS_VERSION; status->pr_statussz = sizeof(elf_prstatus_t); status->pr_gregsetsz = sizeof(elf_gregset_t); status->pr_fpregsetsz = sizeof(elf_fpregset_t); status->pr_osreldate = osreldate; status->pr_cursig = td->td_proc->p_sig; status->pr_pid = td->td_tid; #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 fill_regs32(td, &status->pr_reg); #else fill_regs(td, &status->pr_reg); #endif sbuf_bcat(sb, status, sizeof(*status)); free(status, M_TEMP); } *sizep = sizeof(*status); } static void __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; elf_prfpregset_t *fpregset; td = (struct thread *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(*fpregset), ("invalid size")); fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK); #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 fill_fpregs32(td, fpregset); #else fill_fpregs(td, fpregset); #endif sbuf_bcat(sb, fpregset, sizeof(*fpregset)); free(fpregset, M_TEMP); } *sizep = sizeof(*fpregset); } static void __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; elf_thrmisc_t thrmisc; td = (struct thread *)arg; if (sb != NULL) { KASSERT(*sizep == sizeof(thrmisc), ("invalid size")); bzero(&thrmisc._pad, sizeof(thrmisc._pad)); strcpy(thrmisc.pr_tname, td->td_name); sbuf_bcat(sb, &thrmisc, sizeof(thrmisc)); } *sizep = sizeof(thrmisc); } /* * Allow for MD specific notes, as well as any MD * specific preparations for writing MI notes. */ static void __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep) { struct thread *td; void *buf; size_t size; td = (struct thread *)arg; size = *sizep; if (size != 0 && sb != NULL) buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK); else buf = NULL; size = 0; __elfN(dump_thread)(td, buf, &size); KASSERT(sb == NULL || *sizep == size, ("invalid size")); if (size != 0 && sb != NULL) sbuf_bcat(sb, buf, size); free(buf, M_TEMP); *sizep = size; } #ifdef KINFO_PROC_SIZE CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); #endif static void __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + p->p_numthreads * sizeof(elf_kinfo_proc_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(elf_kinfo_proc_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_out(p, sb, ELF_KERN_PROC_MASK); } *sizep = size; } #ifdef KINFO_FILE_SIZE CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); #endif static void note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_filedesc_out(p, sb, -1); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { structsize = sizeof(struct kinfo_file); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_filedesc_out(p, sb, -1); } } #ifdef KINFO_VMENTRY_SIZE CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); #endif static void note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_vmmap_out(p, sb); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { structsize = sizeof(struct kinfo_vmentry); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); kern_proc_vmmap_out(p, sb); } } static void note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(gid_t); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups * sizeof(gid_t)); } *sizep = size; } static void note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(p->p_fd->fd_cmask); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask)); } *sizep = size; } static void note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; struct rlimit rlim[RLIM_NLIMITS]; size_t size; int structsize, i; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(rlim); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(rlim); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); for (i = 0; i < RLIM_NLIMITS; i++) lim_rlimit(p, i, &rlim[i]); PROC_UNLOCK(p); sbuf_bcat(sb, rlim, sizeof(rlim)); } *sizep = size; } static void note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(p->p_osrel); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(p->p_osrel); sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel)); } *sizep = size; } static void __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; elf_ps_strings_t ps_strings; size_t size; int structsize; p = (struct proc *)arg; size = sizeof(structsize) + sizeof(ps_strings); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); structsize = sizeof(ps_strings); #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 ps_strings = PTROUT(p->p_sysent->sv_psstrings); #else ps_strings = p->p_sysent->sv_psstrings; #endif sbuf_bcat(sb, &structsize, sizeof(structsize)); sbuf_bcat(sb, &ps_strings, sizeof(ps_strings)); } *sizep = size; } static void __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; int structsize; p = (struct proc *)arg; if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PHOLD(p); proc_getauxv(curthread, p, sb); PRELE(p); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { structsize = sizeof(Elf_Auxinfo); sbuf_bcat(sb, &structsize, sizeof(structsize)); PHOLD(p); proc_getauxv(curthread, p, sb); PRELE(p); } } static boolean_t __elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel, const Elf_Phdr *pnote) { const Elf_Note *note, *note0, *note_end; const char *note_name; int i; if (pnote == NULL || pnote->p_offset > PAGE_SIZE || pnote->p_filesz > PAGE_SIZE - pnote->p_offset) return (FALSE); note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset); note_end = (const Elf_Note *)(imgp->image_header + pnote->p_offset + pnote->p_filesz); for (i = 0; i < 100 && note >= note0 && note < note_end; i++) { if (!aligned(note, Elf32_Addr) || (const char *)note_end - (const char *)note < sizeof(Elf_Note)) return (FALSE); if (note->n_namesz != checknote->hdr.n_namesz || note->n_descsz != checknote->hdr.n_descsz || note->n_type != checknote->hdr.n_type) goto nextnote; note_name = (const char *)(note + 1); if (note_name + checknote->hdr.n_namesz >= (const char *)note_end || strncmp(checknote->vendor, note_name, checknote->hdr.n_namesz) != 0) goto nextnote; /* * Fetch the osreldate for binary * from the ELF OSABI-note if necessary. */ if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 && checknote->trans_osrel != NULL) return (checknote->trans_osrel(note, osrel)); return (TRUE); nextnote: note = (const Elf_Note *)((const char *)(note + 1) + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) + roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE)); } return (FALSE); } /* * Try to find the appropriate ABI-note section for checknote, * fetch the osreldate for binary from the ELF OSABI-note. Only the * first page of the image is searched, the same as for headers. */ static boolean_t __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel) { const Elf_Phdr *phdr; const Elf_Ehdr *hdr; int i; hdr = (const Elf_Ehdr *)imgp->image_header; phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); for (i = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i])) return (TRUE); } return (FALSE); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw __elfN(execsw) = { __CONCAT(exec_, __elfN(imgact)), __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) }; EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); #ifdef COMPRESS_USER_CORES /* * Compress and write out a core segment for a user process. * * 'inbuf' is the starting address of a VM segment in the process' address * space that is to be compressed and written out to the core file. 'dest_buf' * is a buffer in the kernel's address space. The segment is copied from * 'inbuf' to 'dest_buf' first before being processed by the compression * routine gzwrite(). This copying is necessary because the content of the VM * segment may change between the compression pass and the crc-computation pass * in gzwrite(). This is because realtime threads may preempt the UNIX kernel. * * If inbuf is NULL it is assumed that data is already copied to 'dest_buf'. */ static int compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len, struct thread *td) { int len_compressed; int error = 0; unsigned int chunk_len; while (len) { if (inbuf != NULL) { chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len; copyin(inbuf, dest_buf, chunk_len); inbuf += chunk_len; } else { chunk_len = len; } len_compressed = gzwrite(file, dest_buf, chunk_len); EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed); if ((unsigned int)len_compressed != chunk_len) { log(LOG_WARNING, "compress_core: length mismatch (0x%x returned, " "0x%x expected)\n", len_compressed, chunk_len); EVENTHANDLER_INVOKE(app_coredump_error, td, "compress_core: length mismatch %x -> %x", chunk_len, len_compressed); error = EFAULT; break; } len -= chunk_len; maybe_yield(); } return (error); } #endif /* COMPRESS_USER_CORES */ static vm_prot_t __elfN(trans_prot)(Elf_Word flags) { vm_prot_t prot; prot = 0; if (flags & PF_X) prot |= VM_PROT_EXECUTE; if (flags & PF_W) prot |= VM_PROT_WRITE; if (flags & PF_R) prot |= VM_PROT_READ; #if __ELF_WORD_SIZE == 32 #if defined(__amd64__) || defined(__ia64__) if (i386_read_exec && (flags & PF_R)) prot |= VM_PROT_EXECUTE; #endif #endif return (prot); } static Elf_Word __elfN(untrans_prot)(vm_prot_t prot) { Elf_Word flags; flags = 0; if (prot & VM_PROT_EXECUTE) flags |= PF_X; if (prot & VM_PROT_READ) flags |= PF_R; if (prot & VM_PROT_WRITE) flags |= PF_W; return (flags); } Index: stable/10/sys/kern/kern_exec.c =================================================================== --- stable/10/sys/kern/kern_exec.c (revision 281847) +++ stable/10/sys/kern/kern_exec.c (revision 281848) @@ -1,1536 +1,1549 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, NULL, 0, sysctl_kern_ps_strings, "LU", ""); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, sysctl_kern_stackprot, "I", ""); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, ""); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero); SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_psstrings; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, sizeof(p->p_sysent->sv_psstrings)); return error; } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_usrstack; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, sizeof(p->p_sysent->sv_usrstack)); return error; } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(td, uap) struct thread *td; struct execve_args /* { char *fname; char **argv; char **envv; } */ *uap; { int error; struct image_args args; error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; } #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { int error; struct image_args args; error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(td, uap) struct thread *td; struct __mac_execve_args /* { char *fname; char **argv; char **envv; struct mac *mac_p; } */ *uap; { #ifdef MAC int error; struct image_args args; error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p); return (error); #else return (ENOSYS); #endif } /* * XXX: kern_execve has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(td, args, mac_p) struct thread *td; struct image_args *args; struct mac *mac_p; { struct proc *p = td->td_proc; struct vmspace *oldvmspace; int error; AUDIT_ARG_ARGV(args->begin_argv, args->argc, args->begin_envv - args->begin_argv); AUDIT_ARG_ENVV(args->begin_envv, args->envc, args->endp - args->begin_envv); if (p->p_flag & P_HADTHREADS) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY)) { PROC_UNLOCK(p); exec_free_args(args); return (ERESTART); /* Try again later. */ } PROC_UNLOCK(p); } KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); oldvmspace = td->td_proc->p_vmspace; error = do_execve(td, args, mac_p); if (p->p_flag & P_HADTHREADS) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == 0) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(td->td_proc->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } return (error); } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(td, args, mac_p) struct thread *td; struct image_args *args; struct mac *mac_p; { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *newcred = NULL, *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts, *newsigacts; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif struct vnode *textvp = NULL, *binvp = NULL; cap_rights_t rights; int credential_changing; int textset; #ifdef MAC struct label *interpvplabel = NULL; int will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ imgp->proc = p; imgp->execlabel = NULL; imgp->attr = &attr; imgp->entry_addr = 0; imgp->reloc_base = 0; imgp->vmspace_destroyed = 0; imgp->interpreted = 0; imgp->opened = 0; imgp->interpreter_name = NULL; imgp->auxargs = NULL; imgp->vp = NULL; imgp->object = NULL; imgp->firstpage = NULL; imgp->ps_strings = 0; imgp->auxarg_size = 0; imgp->args = args; imgp->execpath = imgp->freepath = NULL; imgp->execpathp = 0; imgp->canary = 0; imgp->canarylen = 0; imgp->pagesizes = 0; imgp->pagesizeslen = 0; imgp->stack_prot = 0; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif imgp->image_header = NULL; /* * Translate the file name. namei() returns a vnode pointer * in ni_vp amoung other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. */ if (args->fname != NULL) { NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif error = namei(&nd); if (error) goto exec_fail; binvp = nd.ni_vp; imgp->vp = binvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, cap_rights_init(&rights, CAP_FEXECVE), &binvp); if (error) goto exec_fail; vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(binvp); imgp->vp = binvp; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); /* * Set VV_TEXT now so no one can write to the executable while we're * activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ textset = VOP_IS_TEXT(imgp->vp); VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) { if (textset == 0) VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * VV_TEXT needs to be unset for scripts. There is a short * period before we determine that something is a script where * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC mac_execve_interpreter_enter(binvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(binvp, FREAD, td->td_ucred, td); imgp->opened = 0; } vput(binvp); vm_object_deallocate(imgp->object); imgp->object = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp, 0); /* * Do the best to calculate the full path to the image file. */ if (imgp->auxargs != NULL && ((args->fname != NULL && args->fname[0] == '/') || vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) imgp->execpath = args->fname; if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Copy out strings (args and env) and initialize stack base */ if (p->p_sysent->sv_copyout_strings) stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); else stack_base = exec_copyout_strings(imgp); /* * If custom stack fixup routine present for this process * let it do the stack setup. * Else stuff argument count as first item on stack */ if (p->p_sysent->sv_fixup != NULL) (*p->p_sysent->sv_fixup)(&stack_base, imgp); else suword(--stack_base, imgp->args->argc); /* * For security and other reasons, the file descriptor table cannot * be shared after an exec. */ fdunshare(td); /* close files on exec */ fdcloseexec(td); /* * Malloc things before we need locks. */ i = imgp->args->begin_envv - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); /* Get a reference to the vnode prior to locking the proc */ VREF(binvp); /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } else { oldsigacts = NULL; newsigacts = NULL; /* satisfy gcc */ } PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; oldcred = p->p_ucred; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); } /* * Implement image setuid/setgid. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in compatibility mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = 0; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp); credential_changing |= will_transition; #endif if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE if (p->p_tracecred != NULL && priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0)) ktrprocexec(p, &tracecred, &tracevp); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * setugidsafety() may call closef() and then pfind() * which may grab the process lock. * fdcheckstd() may call falloc() which may block to * allocate memory, so temporarily drop the process lock. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); setugidsafety(td); error = fdcheckstd(td); if (error != 0) goto done1; newcred = crdup(oldcred); euip = uifind(attr.va_uid); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); /* * Set the new credentials. */ if (attr.va_mode & S_ISUID) change_euid(newcred, euip); if (attr.va_mode & S_ISGID) change_egid(newcred, attr.va_gid); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, newcred, imgp->vp, interpvplabel, imgp); } #endif /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(newcred, newcred->cr_uid); change_svgid(newcred, newcred->cr_gid); p->p_ucred = newcred; } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); newcred = crdup(oldcred); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); change_svuid(newcred, newcred->cr_uid); change_svgid(newcred, newcred->cr_gid); p->p_ucred = newcred; } } /* * Store the vp for use in procfs. This vnode was referenced prior * to locking the proc lock. */ textvp = p->p_textvp; p->p_textvp = binvp; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. * * The proc lock needs to be released before taking the PMC * SX. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } else PROC_UNLOCK(p); #else /* !HWPMC_HOOKS */ PROC_UNLOCK(p); #endif /* Set values passed into the program in registers. */ if (p->p_sysent->sv_setregs) (*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base); else exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); vfs_mark_atime(imgp->vp, td->td_ucred); SDT_PROBE(proc, kernel, , exec__success, args->fname, 0, 0, 0, 0); VOP_UNLOCK(imgp->vp, 0); done1: /* * Free any resources malloc'd earlier that we didn't use. */ if (euip != NULL) uifree(euip); if (newcred != NULL) crfree(oldcred); /* * Handle deferred decrement of ref counts. */ if (textvp != NULL) vrele(textvp); if (binvp && error != 0) vrele(binvp); #ifdef KTRACE if (tracevp != NULL) vrele(tracevp); if (tracecred != NULL) crfree(tracecred); #endif vn_lock(imgp->vp, LK_SHARED | LK_RETRY); pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); exec_fail_dealloc: /* * free various allocated resources */ if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (args->fname) NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); vput(imgp->vp); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { PROC_LOCK(p); td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); /* * Stop the process here if its stop event mask has * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); goto done2; } exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); done2: #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, W_EXITCODE(0, SIGABRT)); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif return (error); } int exec_map_first_page(imgp) struct image_params *imgp; { int rv, i; int initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); VM_OBJECT_WLOCK(object); #if VM_NRESERVLEVEL > 0 if ((object->flags & OBJ_COLORED) == 0) { object->flags |= OBJ_COLORED; object->pg_color = 0; } #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); if (ma[0]->valid != VM_PAGE_BITS_ALL) { initial_pagein = VM_INITIAL_PAGEIN; if (initial_pagein > object->size) initial_pagein = object->size; for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; if (vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); if (ma[i] == NULL) break; } } initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, 0); ma[0] = vm_page_lookup(object, 0); if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { if (ma[0] != NULL) { vm_page_lock(ma[0]); vm_page_free(ma[0]); vm_page_unlock(ma[0]); } VM_OBJECT_WUNLOCK(object); return (EIO); } } vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_activate(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(imgp) struct image_params *imgp; { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } } /* * Destroy old address space, and allocate a new stack * The new stack is only SGROWSIZ large because it is grown * automatically in trap.c. */ int exec_new_vmspace(imgp, sv) struct image_params *imgp; struct sysentvec *sv; { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; vm_object_t obj; + struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; u_long ssiz; imgp->vmspace_destroyed = 1; imgp->sysent = sv; /* May be called with Giant held */ EVENTHANDLER_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error) { vm_object_deallocate(obj); return (error); } } /* Allocate a new stack */ - if (sv->sv_maxssiz != NULL) + if (imgp->stack_sz != 0) { + ssiz = imgp->stack_sz; + PROC_LOCK(p); + lim_rlimit(p, RLIMIT_STACK, &rlim_stack); + PROC_UNLOCK(p); + if (ssiz > rlim_stack.rlim_max) + ssiz = rlim_stack.rlim_max; + if (ssiz > rlim_stack.rlim_cur) { + rlim_stack.rlim_cur = ssiz; + kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); + } + } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; - else + } else { ssiz = maxssiz; + } stack_addr = sv->sv_usrstack - ssiz; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error) return (error); #ifdef __ia64__ /* Allocate a new register stack */ stack_addr = IA64_BACKINGSTORE; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP); if (error) return (error); #endif /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the * VM_STACK case, but they are still used to monitor the size of the * process stack so we can check the stack rlimit. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long argp, envp; int error; size_t length; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ for (;;) { error = fueword(argv++, &argp); if (error == -1) { error = EFAULT; goto err_exit; } if (argp == 0) break; error = copyinstr((void *)(uintptr_t)argp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &envp); if (error == -1) { error = EFAULT; goto err_exit; } if (envp == 0) break; error = copyinstr((void *)(uintptr_t)envp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. Returns zero if the allocation succeeds * and ENOMEM otherwise. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); return (args->buf != NULL ? 0 : ENOMEM); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, PATH_MAX + ARG_MAX); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ register_t * exec_copyout_strings(imgp) struct image_params *imgp; { int argc, envc; char **vectp; char *stringp; uintptr_t destp; register_t *stack_base; struct ps_strings *arginfo; struct proc *p; size_t execpath_len; int szsigcode, szps; char canary[sizeof(long) * 8]; szps = sizeof(pagesizes[0]) * MAXPAGESIZES; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; szsigcode = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; if (p->p_sysent->sv_sigcode_base == 0) { if (p->p_sysent->sv_szsigcode != NULL) szsigcode = *(p->p_sysent->sv_szsigcode); } destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ destp -= szps; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = destp; copyout(pagesizes, (void *)destp, szps); imgp->pagesizeslen = szps; destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(imgp) struct image_params *imgp; { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error, writecount; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) return (error); if (writecount != 0) return (ETXTBSY); /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = 1; return (error); } /* * Exec handler registration */ int exec_register(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); if (newexecsw == NULL) return (ENOMEM); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); if (newexecsw == NULL) return (ENOMEM); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } Index: stable/10/sys/kern/kern_resource.c =================================================================== --- stable/10/sys/kern/kern_resource.c (revision 281847) +++ stable/10/sys/kern/kern_resource.c (revision 281848) @@ -1,1454 +1,1459 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static struct rwlock uihashtbl_lock; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); static void ruxagg_locked(struct rusage_ext *rux, struct thread *td); static __inline int lim_shared(struct plimit *limp); /* * Resource controls and accounting. */ #ifndef _SYS_SYSPROTO_H_ struct getpriority_args { int which; int who; }; #endif int sys_getpriority(td, uap) struct thread *td; register struct getpriority_args *uap; { struct proc *p; struct pgrp *pg; int error, low; error = 0; low = PRIO_MAX + 1; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) low = td->td_proc->p_nice; else { p = pfind(uap->who); if (p == NULL) break; if (p_cansee(td, p) == 0) low = p->p_nice; PROC_UNLOCK(p); } break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0 && p->p_ucred->cr_uid == uap->who) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (low == PRIO_MAX + 1 && error == 0) error = ESRCH; td->td_retval[0] = low; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setpriority_args { int which; int who; int prio; }; #endif int sys_setpriority(td, uap) struct thread *td; struct setpriority_args *uap; { struct proc *curp, *p; struct pgrp *pg; int found = 0, error = 0; curp = td->td_proc; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) { PROC_LOCK(curp); error = donice(td, curp, uap->prio); PROC_UNLOCK(curp); } else { p = pfind(uap->who); if (p == NULL) break; error = p_cansee(td, p); if (error == 0) error = donice(td, p, uap->prio); PROC_UNLOCK(p); } found++; break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = curp->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p->p_ucred->cr_uid == uap->who && p_cansee(td, p) == 0) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (found == 0 && error == 0) error = ESRCH; return (error); } /* * Set "nice" for a (whole) process. */ static int donice(struct thread *td, struct proc *p, int n) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = p_cansched(td, p))) return (error); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) return (EACCES); sched_nice(p, n); return (0); } static int unprivileged_idprio; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW, &unprivileged_idprio, 0, "Allow non-root users to set an idle priority"); /* * Set realtime priority for LWP. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_thread_args { int function; lwpid_t lwpid; struct rtprio *rtp; }; #endif int sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) { struct proc *p; struct rtprio rtp; struct thread *td1; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; if (uap->lwpid == 0 || uap->lwpid == td->td_tid) { p = td->td_proc; td1 = td; PROC_LOCK(p); } else { /* Only look up thread in current process */ td1 = tdfind(uap->lwpid, curproc->p_pid); if (td1 == NULL) return (ESRCH); p = td1->td_proc; } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; pri_to_rtp(td1, &rtp); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* Disallow setting rtprio in most cases if not superuser. */ /* * Realtime priority has to be restricted for reasons which * should be obvious. However, for idleprio processes, there is * a potential for system deadlock if an idleprio process gains * a lock on a resource that other processes need (and the * idleprio process can't run due to a CPU-bound normal * process). Fix me! XXX * * This problem is not only related to idleprio process. * A user level program can obtain a file lock and hold it * indefinitely. Additionally, without idleprio processes it is * still conceivable that a program with low priority will never * get to run. In short, allowing this feature might make it * easier to lock a resource indefinitely, but it is not the * only thing that makes it possible. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME || (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE && unprivileged_idprio == 0)) { error = priv_check(td, PRIV_SCHED_RTPRIO); if (error) break; } error = rtp_to_pri(&rtp, td1); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } /* * Set realtime priority. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_args { int function; pid_t pid; struct rtprio *rtp; }; #endif int sys_rtprio(td, uap) struct thread *td; /* curthread */ register struct rtprio_args *uap; { struct proc *p; struct thread *tdp; struct rtprio rtp; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; if (uap->pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; /* * Return OUR priority if no pid specified, * or if one is, report the highest priority * in the process. There isn't much more you can do as * there is only room to return a single priority. * Note: specifying our own pid is not the same * as leaving it zero. */ if (uap->pid == 0) { pri_to_rtp(td, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_THREAD_IN_PROC(p, tdp) { pri_to_rtp(tdp, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && rtp2.prio < rtp.prio)) { rtp.type = rtp2.type; rtp.prio = rtp2.prio; } } } PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* * Disallow setting rtprio in most cases if not superuser. * See the comment in sys_rtprio_thread about idprio * threads holding a lock. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME || (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE && !unprivileged_idprio)) { error = priv_check(td, PRIV_SCHED_RTPRIO); if (error) break; } /* * If we are setting our own priority, set just our * thread but if we are doing another process, * do all the threads on that process. If we * specify our own pid we do the latter. */ if (uap->pid == 0) { error = rtp_to_pri(&rtp, td); } else { FOREACH_THREAD_IN_PROC(p, td) { if ((error = rtp_to_pri(&rtp, td)) != 0) break; } } break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } int rtp_to_pri(struct rtprio *rtp, struct thread *td) { u_char newpri, oldclass, oldpri; switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); newpri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) return (EINVAL); newpri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); newpri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } thread_lock(td); oldclass = td->td_pri_class; sched_class(td, rtp->type); /* XXX fix */ oldpri = td->td_user_pri; sched_user_prio(td, newpri); if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL || td->td_pri_class != RTP_PRIO_NORMAL)) sched_prio(td, td->td_user_pri); if (TD_ON_UPILOCK(td) && oldpri != newpri) { critical_enter(); thread_unlock(td); umtx_pi_adjust(td, oldpri); critical_exit(); } else thread_unlock(td); return (0); } void pri_to_rtp(struct thread *td, struct rtprio *rtp) { thread_lock(td); switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; break; default: break; } rtp->type = td->td_pri_class; thread_unlock(td); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int osetrlimit(td, uap) struct thread *td; register struct osetrlimit_args *uap; { struct orlimit olim; struct rlimit lim; int error; if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) return (error); lim.rlim_cur = olim.rlim_cur; lim.rlim_max = olim.rlim_max; error = kern_setrlimit(td, uap->which, &lim); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int ogetrlimit(td, uap) struct thread *td; register struct ogetrlimit_args *uap; { struct orlimit olim; struct rlimit rl; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rl); PROC_UNLOCK(p); /* * XXX would be more correct to convert only RLIM_INFINITY to the * old RLIM_INFINITY and fail with EOVERFLOW for other larger * values. Most 64->32 and 32->16 conversions, including not * unimportant ones of uids are even more broken than what we * do here (they blindly truncate). We don't do this correctly * here since we have little experience with EOVERFLOW yet. * Elsewhere, getuid() can't fail... */ olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; error = copyout(&olim, uap->rlp, sizeof(olim)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct __setrlimit_args { u_int which; struct rlimit *rlp; }; #endif int sys_setrlimit(td, uap) struct thread *td; register struct __setrlimit_args *uap; { struct rlimit alim; int error; if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) return (error); error = kern_setrlimit(td, uap->which, &alim); return (error); } static void lim_cb(void *arg) { struct rlimit rlim; struct thread *td; struct proc *p; p = arg; PROC_LOCK_ASSERT(p, MA_OWNED); /* * Check if the process exceeds its cpu resource allocation. If * it reaches the max, arrange to kill the process in ast(). */ if (p->p_cpulimit == RLIM_INFINITY) return; PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); } PROC_SUNLOCK(p); if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { lim_rlimit(p, RLIMIT_CPU, &rlim); if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { killproc(p, "exceeded maximum CPU limit"); } else { if (p->p_cpulimit < rlim.rlim_max) p->p_cpulimit += 5; kern_psignal(p, SIGXCPU); } } if ((p->p_flag & P_WEXIT) == 0) callout_reset_sbt(&p->p_limco, SBT_1S, 0, lim_cb, p, C_PREL(1)); } int kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp) { return (kern_proc_setrlimit(td, td->td_proc, which, limp)); } int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp) { struct plimit *newlim, *oldlim; register struct rlimit *alimp; struct rlimit oldssiz; int error; if (which >= RLIM_NLIMITS) return (EINVAL); /* * Preserve historical bugs by treating negative limits as unsigned. */ if (limp->rlim_cur < 0) limp->rlim_cur = RLIM_INFINITY; if (limp->rlim_max < 0) limp->rlim_max = RLIM_INFINITY; oldssiz.rlim_cur = 0; newlim = NULL; PROC_LOCK(p); if (lim_shared(p->p_limit)) { PROC_UNLOCK(p); newlim = lim_alloc(); PROC_LOCK(p); } oldlim = p->p_limit; alimp = &oldlim->pl_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { PROC_UNLOCK(p); if (newlim != NULL) lim_free(newlim); return (error); } if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; if (newlim != NULL) { lim_copy(newlim, oldlim); alimp = &newlim->pl_rlimit[which]; } switch (which) { case RLIMIT_CPU: if (limp->rlim_cur != RLIM_INFINITY && p->p_cpulimit == RLIM_INFINITY) callout_reset_sbt(&p->p_limco, SBT_1S, 0, lim_cb, p, C_PREL(1)); p->p_cpulimit = limp->rlim_cur; break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) limp->rlim_cur = maxdsiz; if (limp->rlim_max > maxdsiz) limp->rlim_max = maxdsiz; break; case RLIMIT_STACK: if (limp->rlim_cur > maxssiz) limp->rlim_cur = maxssiz; if (limp->rlim_max > maxssiz) limp->rlim_max = maxssiz; oldssiz = *alimp; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(&oldssiz, RLIMIT_STACK); break; case RLIMIT_NOFILE: if (limp->rlim_cur > maxfilesperproc) limp->rlim_cur = maxfilesperproc; if (limp->rlim_max > maxfilesperproc) limp->rlim_max = maxfilesperproc; break; case RLIMIT_NPROC: if (limp->rlim_cur > maxprocperuid) limp->rlim_cur = maxprocperuid; if (limp->rlim_max > maxprocperuid) limp->rlim_max = maxprocperuid; if (limp->rlim_cur < 1) limp->rlim_cur = 1; if (limp->rlim_max < 1) limp->rlim_max = 1; break; } if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(limp, which); *alimp = *limp; if (newlim != NULL) p->p_limit = newlim; PROC_UNLOCK(p); if (newlim != NULL) lim_free(oldlim); - if (which == RLIMIT_STACK) { + if (which == RLIMIT_STACK && + /* + * Skip calls from exec_new_vmspace(), done when stack is + * not mapped yet. + */ + (td != curthread || (p->p_flag & P_INEXEC) == 0)) { /* * Stack is allocated to the max at exec time with only * "rlim_cur" bytes accessible. If stack limit is going * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != oldssiz.rlim_cur) { vm_offset_t addr; vm_size_t size; vm_prot_t prot; if (limp->rlim_cur > oldssiz.rlim_cur) { prot = p->p_sysent->sv_stackprot; size = limp->rlim_cur - oldssiz.rlim_cur; addr = p->p_sysent->sv_usrstack - limp->rlim_cur; } else { prot = VM_PROT_NONE; size = oldssiz.rlim_cur - limp->rlim_cur; addr = p->p_sysent->sv_usrstack - oldssiz.rlim_cur; } addr = trunc_page(addr); size = round_page(size); (void)vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, FALSE); } } return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* ARGSUSED */ int sys_getrlimit(td, uap) struct thread *td; register struct __getrlimit_args *uap; { struct rlimit rlim; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rlim); PROC_UNLOCK(p); error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); return (error); } /* * Transform the running time and tick information for children of proc p * into user and system time usage. */ void calccru(p, up, sp) struct proc *p; struct timeval *up; struct timeval *sp; { PROC_LOCK_ASSERT(p, MA_OWNED); calcru1(p, &p->p_crux, up, sp); } /* * Transform the running time and tick information in proc p into user * and system time usage. If appropriate, include the current time slice * on this CPU. */ void calcru(struct proc *p, struct timeval *up, struct timeval *sp) { struct thread *td; uint64_t runtime, u; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); /* * If we are getting stats for the current process, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ td = curthread; if (td->td_proc == p) { u = cpu_ticks(); runtime = u - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, u); } /* Make sure the per-thread stats are current. */ FOREACH_THREAD_IN_PROC(p, td) { if (td->td_incruntime == 0) continue; ruxagg(p, td); } calcru1(p, &p->p_rux, up, sp); } /* Collect resource usage for a single thread. */ void rufetchtd(struct thread *td, struct rusage *ru) { struct proc *p; uint64_t runtime, u; p = td->td_proc; PROC_SLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If we are getting stats for the current thread, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ if (td == curthread) { u = cpu_ticks(); runtime = u - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, u); } ruxagg(p, td); *ru = td->td_ru; calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime); } static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp) { /* {user, system, interrupt, total} {ticks, usec}: */ uint64_t ut, uu, st, su, it, tt, tu; ut = ruxp->rux_uticks; st = ruxp->rux_sticks; it = ruxp->rux_iticks; tt = ut + st + it; if (tt == 0) { /* Avoid divide by zero */ st = 1; tt = 1; } tu = cputick2usec(ruxp->rux_runtime); if ((int64_t)tu < 0) { /* XXX: this should be an assert /phk */ printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); tu = ruxp->rux_tu; } if (tu >= ruxp->rux_tu) { /* * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ uu = (tu * ut) / tt; if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; su = (tu * st) / tt; if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { /* * When we calibrate the cputicker, it is not uncommon to * see the presumably fixed frequency increase slightly over * time as a result of thermal stabilization and NTP * discipline (of the reference clock). We therefore ignore * a bit of backwards slop because we expect to catch up * shortly. We use a 3 microsecond limit to catch low * counts and a 1% limit for high counts. */ uu = ruxp->rux_uu; su = ruxp->rux_su; tu = ruxp->rux_tu; } else { /* tu < ruxp->rux_tu */ /* * What happened here was likely that a laptop, which ran at * a reduced clock frequency at boot, kicked into high gear. * The wisdom of spamming this message in that case is * dubious, but it might also be indicative of something * serious, so lets keep it and hope laptops can be made * more truthful about their CPU speed via ACPI. */ printf("calcru: runtime went backwards from %ju usec " "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); uu = (tu * ut) / tt; su = (tu * st) / tt; } ruxp->rux_uu = uu; ruxp->rux_su = su; ruxp->rux_tu = tu; up->tv_sec = uu / 1000000; up->tv_usec = uu % 1000000; sp->tv_sec = su / 1000000; sp->tv_usec = su % 1000000; } #ifndef _SYS_SYSPROTO_H_ struct getrusage_args { int who; struct rusage *rusage; }; #endif int sys_getrusage(td, uap) register struct thread *td; register struct getrusage_args *uap; { struct rusage ru; int error; error = kern_getrusage(td, uap->who, &ru); if (error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_getrusage(struct thread *td, int who, struct rusage *rup) { struct proc *p; int error; error = 0; p = td->td_proc; PROC_LOCK(p); switch (who) { case RUSAGE_SELF: rufetchcalc(p, rup, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_CHILDREN: *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_THREAD: PROC_SLOCK(p); thread_lock(td); rufetchtd(td, rup); thread_unlock(td); PROC_SUNLOCK(p); break; default: error = EINVAL; } PROC_UNLOCK(p); return (error); } void rucollect(struct rusage *ru, struct rusage *ru2) { long *ip, *ip2; int i; if (ru->ru_maxrss < ru2->ru_maxrss) ru->ru_maxrss = ru2->ru_maxrss; ip = &ru->ru_first; ip2 = &ru2->ru_first; for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) *ip++ += *ip2++; } void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, struct rusage_ext *rux2) { rux->rux_runtime += rux2->rux_runtime; rux->rux_uticks += rux2->rux_uticks; rux->rux_sticks += rux2->rux_sticks; rux->rux_iticks += rux2->rux_iticks; rux->rux_uu += rux2->rux_uu; rux->rux_su += rux2->rux_su; rux->rux_tu += rux2->rux_tu; rucollect(ru, ru2); } /* * Aggregate tick counts into the proc's rusage_ext. */ static void ruxagg_locked(struct rusage_ext *rux, struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); rux->rux_runtime += td->td_incruntime; rux->rux_uticks += td->td_uticks; rux->rux_sticks += td->td_sticks; rux->rux_iticks += td->td_iticks; } void ruxagg(struct proc *p, struct thread *td) { thread_lock(td); ruxagg_locked(&p->p_rux, td); ruxagg_locked(&td->td_rux, td); td->td_incruntime = 0; td->td_uticks = 0; td->td_iticks = 0; td->td_sticks = 0; thread_unlock(td); } /* * Update the rusage_ext structure and fetch a valid aggregate rusage * for proc p if storage for one is supplied. */ void rufetch(struct proc *p, struct rusage *ru) { struct thread *td; PROC_SLOCK_ASSERT(p, MA_OWNED); *ru = p->p_ru; if (p->p_numthreads > 0) { FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); rucollect(ru, &td->td_ru); } } } /* * Atomically perform a rufetch and a calcru together. * Consumers, can safely assume the calcru is executed only once * rufetch is completed. */ void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, struct timeval *sp) { PROC_SLOCK(p); rufetch(p, ru); calcru(p, up, sp); PROC_SUNLOCK(p); } /* * Allocate a new resource limits structure and initialize its * reference count and mutex pointer. */ struct plimit * lim_alloc() { struct plimit *limp; limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); refcount_init(&limp->pl_refcnt, 1); return (limp); } struct plimit * lim_hold(limp) struct plimit *limp; { refcount_acquire(&limp->pl_refcnt); return (limp); } static __inline int lim_shared(limp) struct plimit *limp; { return (limp->pl_refcnt > 1); } void lim_fork(struct proc *p1, struct proc *p2) { PROC_LOCK_ASSERT(p1, MA_OWNED); PROC_LOCK_ASSERT(p2, MA_OWNED); p2->p_limit = lim_hold(p1->p_limit); callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); if (p1->p_cpulimit != RLIM_INFINITY) callout_reset_sbt(&p2->p_limco, SBT_1S, 0, lim_cb, p2, C_PREL(1)); } void lim_free(limp) struct plimit *limp; { KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); if (refcount_release(&limp->pl_refcnt)) free((void *)limp, M_PLIMIT); } /* * Make a copy of the plimit structure. * We share these structures copy-on-write after fork. */ void lim_copy(dst, src) struct plimit *dst, *src; { KASSERT(!lim_shared(dst), ("lim_copy to shared limit")); bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); } /* * Return the hard limit for a particular system resource. The * which parameter specifies the index into the rlimit array. */ rlim_t lim_max(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_max); } /* * Return the current (soft) limit for a particular system resource. * The which parameter which specifies the index into the rlimit array */ rlim_t lim_cur(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_cur); } /* * Return a copy of the entire rlimit structure for the system limit * specified by 'which' in the rlimit structure pointed to by 'rlp'. */ void lim_rlimit(struct proc *p, int which, struct rlimit *rlp) { PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(rlp, which); } void uihashinit() { uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); rw_init(&uihashtbl_lock, "uidinfo hash"); } /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_lock must be locked. */ static struct uidinfo * uilookup(uid) uid_t uid; { struct uihashhead *uipp; struct uidinfo *uip; rw_assert(&uihashtbl_lock, RA_LOCKED); uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) break; return (uip); } /* * Find or allocate a struct uidinfo for a particular uid. * Increase refcount on uidinfo struct returned. * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * uifind(uid) uid_t uid; { struct uidinfo *old_uip, *uip; rw_rlock(&uihashtbl_lock); uip = uilookup(uid); if (uip == NULL) { rw_runlock(&uihashtbl_lock); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); racct_create(&uip->ui_racct); rw_wlock(&uihashtbl_lock); /* * There's a chance someone created our uidinfo while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ if ((old_uip = uilookup(uid)) != NULL) { /* Someone else beat us to it. */ racct_destroy(&uip->ui_racct); free(uip, M_UIDINFO); uip = old_uip; } else { refcount_init(&uip->ui_ref, 0); uip->ui_uid = uid; mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF); LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); } } uihold(uip); rw_unlock(&uihashtbl_lock); return (uip); } /* * Place another refcount on a uidinfo struct. */ void uihold(uip) struct uidinfo *uip; { refcount_acquire(&uip->ui_ref); } /*- * Since uidinfo structs have a long lifetime, we use an * opportunistic refcounting scheme to avoid locking the lookup hash * for each release. * * If the refcount hits 0, we need to free the structure, * which means we need to lock the hash. * Optimal case: * After locking the struct and lowering the refcount, if we find * that we don't need to free, simply unlock and return. * Suboptimal case: * If refcount lowering results in need to free, bump the count * back up, lose the lock and acquire the locks in the proper * order to try again. */ void uifree(uip) struct uidinfo *uip; { int old; /* Prepare for optimal case. */ old = uip->ui_ref; if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1)) return; /* Prepare for suboptimal case. */ rw_wlock(&uihashtbl_lock); if (refcount_release(&uip->ui_ref)) { racct_destroy(&uip->ui_racct); LIST_REMOVE(uip, ui_hash); rw_wunlock(&uihashtbl_lock); if (uip->ui_sbsize != 0) printf("freeing uidinfo: uid = %d, sbsize = %ld\n", uip->ui_uid, uip->ui_sbsize); if (uip->ui_proccnt != 0) printf("freeing uidinfo: uid = %d, proccnt = %ld\n", uip->ui_uid, uip->ui_proccnt); if (uip->ui_vmsize != 0) printf("freeing uidinfo: uid = %d, swapuse = %lld\n", uip->ui_uid, (unsigned long long)uip->ui_vmsize); mtx_destroy(&uip->ui_vmsize_mtx); free(uip, M_UIDINFO); return; } /* * Someone added a reference between atomic_cmpset_int() and * rw_wlock(&uihashtbl_lock). */ rw_wunlock(&uihashtbl_lock); } void ui_racct_foreach(void (*callback)(struct racct *racct, void *arg2, void *arg3), void *arg2, void *arg3) { struct uidinfo *uip; struct uihashhead *uih; rw_rlock(&uihashtbl_lock); for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) { LIST_FOREACH(uip, uih, ui_hash) { (callback)(uip->ui_racct, arg2, arg3); } } rw_runlock(&uihashtbl_lock); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(uip, diff, max) struct uidinfo *uip; int diff; rlim_t max; { /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && max != 0) { if (atomic_fetchadd_long(&uip->ui_proccnt, (long)diff) + diff > max) { atomic_subtract_long(&uip->ui_proccnt, (long)diff); return (0); } } else { atomic_add_long(&uip->ui_proccnt, (long)diff); if (uip->ui_proccnt < 0) printf("negative proccnt for uid = %d\n", uip->ui_uid); } return (1); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(uip, hiwat, to, max) struct uidinfo *uip; u_int *hiwat; u_int to; rlim_t max; { int diff; diff = to - *hiwat; if (diff > 0) { if (atomic_fetchadd_long(&uip->ui_sbsize, (long)diff) + diff > max) { atomic_subtract_long(&uip->ui_sbsize, (long)diff); return (0); } } else { atomic_add_long(&uip->ui_sbsize, (long)diff); if (uip->ui_sbsize < 0) printf("negative sbsize for uid = %d\n", uip->ui_uid); } *hiwat = to; return (1); } /* * Change the count associated with number of pseudo-terminals * a given user is using. When 'max' is 0, don't enforce a limit */ int chgptscnt(uip, diff, max) struct uidinfo *uip; int diff; rlim_t max; { /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && max != 0) { if (atomic_fetchadd_long(&uip->ui_ptscnt, (long)diff) + diff > max) { atomic_subtract_long(&uip->ui_ptscnt, (long)diff); return (0); } } else { atomic_add_long(&uip->ui_ptscnt, (long)diff); if (uip->ui_ptscnt < 0) printf("negative ptscnt for uid = %d\n", uip->ui_uid); } return (1); } Index: stable/10/sys/sys/imgact.h =================================================================== --- stable/10/sys/sys/imgact.h (revision 281847) +++ stable/10/sys/sys/imgact.h (revision 281848) @@ -1,102 +1,103 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_IMGACT_H_ #define _SYS_IMGACT_H_ #include #include #define MAXSHELLCMDLEN PAGE_SIZE struct image_args { char *buf; /* pointer to string buffer */ char *begin_argv; /* beginning of argv in buf */ char *begin_envv; /* beginning of envv in buf */ char *endp; /* current `end' pointer of arg & env strings */ char *fname; /* pointer to filename of executable (system space) */ char *fname_buf; /* pointer to optional malloc(M_TEMP) buffer */ int stringspace; /* space left in arg & env buffer */ int argc; /* count of argument strings */ int envc; /* count of environment strings */ int fd; /* file descriptor of the executable */ }; struct image_params { struct proc *proc; /* our process struct */ struct label *execlabel; /* optional exec label */ struct vnode *vp; /* pointer to vnode of file to exec */ struct vm_object *object; /* The vm object for this vp */ struct vattr *attr; /* attributes of file */ const char *image_header; /* head of file to exec */ unsigned long entry_addr; /* entry address of target executable */ unsigned long reloc_base; /* load address of image */ char vmspace_destroyed; /* flag - we've blown away original vm space */ #define IMGACT_SHELL 0x1 #define IMGACT_BINMISC 0x2 unsigned char interpreted; /* mask of interpreters that have run */ char opened; /* flag - we have opened executable vnode */ char *interpreter_name; /* name of the interpreter */ void *auxargs; /* ELF Auxinfo structure pointer */ struct sf_buf *firstpage; /* first page that we mapped */ unsigned long ps_strings; /* PS_STRINGS for BSD/OS binaries */ size_t auxarg_size; struct image_args *args; /* system call arguments */ struct sysentvec *sysent; /* system entry vector */ char *execpath; unsigned long execpathp; char *freepath; unsigned long canary; int canarylen; unsigned long pagesizes; int pagesizeslen; vm_prot_t stack_prot; + u_long stack_sz; }; #ifdef _KERNEL struct sysentvec; struct thread; #define IMGACT_CORE_COMPRESS 0x01 int exec_alloc_args(struct image_args *); int exec_check_permissions(struct image_params *); register_t *exec_copyout_strings(struct image_params *); void exec_free_args(struct image_args *); int exec_new_vmspace(struct image_params *, struct sysentvec *); void exec_setregs(struct thread *, struct image_params *, u_long); int exec_shell_imgact(struct image_params *); int exec_copyin_args(struct image_args *, char *, enum uio_seg, char **, char **); #endif #endif /* !_SYS_IMGACT_H_ */ Index: stable/10 =================================================================== --- stable/10 (revision 281847) +++ stable/10 (revision 281848) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r281548