Index: head/sys/amd64/linux32/linux32_machdep.c =================================================================== --- head/sys/amd64/linux32/linux32_machdep.c (revision 282707) +++ head/sys/amd64/linux32/linux32_machdep.c (revision 282708) @@ -1,1070 +1,1078 @@ /*- * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct l_old_select_argv { l_int nfds; l_uintptr_t readfds; l_uintptr_t writefds; l_uintptr_t exceptfds; l_uintptr_t timeout; } __packed; int linux_to_bsd_sigaltstack(int lsa) { int bsa = 0; if (lsa & LINUX_SS_DISABLE) bsa |= SS_DISABLE; if (lsa & LINUX_SS_ONSTACK) bsa |= SS_ONSTACK; return (bsa); } static int linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, l_int flags, l_int fd, l_loff_t pos); int bsd_to_linux_sigaltstack(int bsa) { int lsa = 0; if (bsa & SS_DISABLE) lsa |= LINUX_SS_DISABLE; if (bsa & SS_ONSTACK) lsa |= LINUX_SS_ONSTACK; return (lsa); } static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) { lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; lru->ru_maxrss = ru->ru_maxrss; lru->ru_ixrss = ru->ru_ixrss; lru->ru_idrss = ru->ru_idrss; lru->ru_isrss = ru->ru_isrss; lru->ru_minflt = ru->ru_minflt; lru->ru_majflt = ru->ru_majflt; lru->ru_nswap = ru->ru_nswap; lru->ru_inblock = ru->ru_inblock; lru->ru_oublock = ru->ru_oublock; lru->ru_msgsnd = ru->ru_msgsnd; lru->ru_msgrcv = ru->ru_msgrcv; lru->ru_nsignals = ru->ru_nsignals; lru->ru_nvcsw = ru->ru_nvcsw; lru->ru_nivcsw = ru->ru_nivcsw; } int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(execve)) printf(ARGS(execve, "%s"), path); #endif + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, args->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); - if (error == 0) + if (error == 0) { /* Linux process can execute FreeBSD one, do not attempt * to create emuldata for such process using * linux_proc_init, this leads to a panic on KASSERT * because such process has p->p_emuldata == NULL. */ if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) error = linux_proc_init(td, 0, 0); + } + post_execve(td, error, oldvmspace); return (error); } CTASSERT(sizeof(struct l_iovec32) == 8); static int linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) { struct l_iovec32 iov32; struct iovec *iov; struct uio *uio; uint32_t iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, int error) { struct l_iovec32 iov32; struct iovec *iov; uint32_t iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return(0); } int linux_readv(struct thread *td, struct linux_readv_args *uap) { struct uio *auio; int error; error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int linux_writev(struct thread *td, struct linux_writev_args *uap) { struct uio *auio; int error; error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } struct l_ipc_kludge { l_uintptr_t msgp; l_long msgtyp; } __packed; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = args->ptr; a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(args->ptr, &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = args->ptr; a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == 0) return (EINVAL); error = copyin(args->ptr, &tmp, sizeof(tmp)); if (error) return (error); a.msgp = PTRIN(tmp.msgp); a.msgtyp = tmp.msgtyp; } else { a.msgp = args->ptr; a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = args->ptr; return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; a.shmid = args->arg1; a.shmaddr = args->ptr; a.shmflg = args->arg2; a.raddr = PTRIN((l_uint)args->arg3); return (linux_shmat(td, &a)); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = args->ptr; return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = args->ptr; return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; #ifdef DEBUG if (ldebug(old_select)) printf(ARGS(old_select, "%p"), args->ptr); #endif error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = PTRIN(linux_args.readfds); newsel.writefds = PTRIN(linux_args.writefds); newsel.exceptfds = PTRIN(linux_args.exceptfds); newsel.timeout = PTRIN(linux_args.timeout); return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct user_segment_descriptor sd; struct l_user_desc info; struct pcb *pcb; int error; int a[2]; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { printf(LMSG("copyin failed!")); } else { /* We might copy out the entry_number as GUGS32_SEL. */ info.entry_number = GUGS32_SEL; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) printf(LMSG("copyout failed!")); a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); memcpy(&sd, &a, sizeof(a)); #ifdef DEBUG if (ldebug(clone)) printf("Segment created in clone with " "CLONE_SETTLS: lobase: %x, hibase: %x, " "lolimit: %x, hilimit: %x, type: %i, " "dpl: %i, p: %i, xx: %i, long: %i, " "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, sd.sd_long, sd.sd_def32, sd.sd_gran); #endif pcb = td->td_pcb; pcb->pcb_gsbase = (register_t)info.base_addr; /* XXXKIB pcb->pcb_gs32sd = sd; */ td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); set_pcb_flags(pcb, PCB_32BIT); } return (error); } int linux_set_upcall_kse(struct thread *td, register_t stack) { td->td_frame->tf_rsp = stack; return (0); } #define STACK_SIZE (2 * 1024 * 1024) #define GUARD_SIZE (4 * PAGE_SIZE) int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { #ifdef DEBUG if (ldebug(mmap2)) printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), args->addr, args->len, args->prot, args->flags, args->fd, args->pgoff); #endif return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); #ifdef DEBUG if (ldebug(mmap)) printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pgoff); #endif return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } static int linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, l_int flags, l_int fd, l_loff_t pos) { struct proc *p = td->td_proc; struct mmap_args /* { caddr_t addr; size_t len; int prot; int flags; int fd; long pad; off_t pos; } */ bsd_args; int error; struct file *fp; cap_rights_t rights; error = 0; bsd_args.flags = 0; fp = NULL; /* * Linux mmap(2): * You must specify exactly one of MAP_SHARED and MAP_PRIVATE */ if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) return (EINVAL); if (flags & LINUX_MAP_SHARED) bsd_args.flags |= MAP_SHARED; if (flags & LINUX_MAP_PRIVATE) bsd_args.flags |= MAP_PRIVATE; if (flags & LINUX_MAP_FIXED) bsd_args.flags |= MAP_FIXED; if (flags & LINUX_MAP_ANON) { /* Enforce pos to be on page boundary, then ignore. */ if ((pos & PAGE_MASK) != 0) return (EINVAL); pos = 0; bsd_args.flags |= MAP_ANON; } else bsd_args.flags |= MAP_NOSYNC; if (flags & LINUX_MAP_GROWSDOWN) bsd_args.flags |= MAP_STACK; /* * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC * on Linux/i386. We do this to ensure maximum compatibility. * Linux/ia64 does the same in i386 emulation mode. */ bsd_args.prot = prot; if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) bsd_args.prot |= PROT_READ | PROT_EXEC; /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; if (bsd_args.fd != -1) { /* * Linux follows Solaris mmap(2) description: * The file descriptor fildes is opened with * read permission, regardless of the * protection options specified. */ error = fget(td, bsd_args.fd, cap_rights_init(&rights, CAP_MMAP), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); return (EINVAL); } /* Linux mmap() just fails for O_WRONLY files */ if (!(fp->f_flag & FREAD)) { fdrop(fp, td); return (EACCES); } fdrop(fp, td); } if (flags & LINUX_MAP_GROWSDOWN) { /* * The Linux MAP_GROWSDOWN option does not limit auto * growth of the region. Linux mmap with this option * takes as addr the inital BOS, and as len, the initial * region size. It can then grow down from addr without * limit. However, Linux threads has an implicit internal * limit to stack size of STACK_SIZE. Its just not * enforced explicitly in Linux. But, here we impose * a limit of (STACK_SIZE - GUARD_SIZE) on the stack * region, since we can do this with our mmap. * * Our mmap with MAP_STACK takes addr as the maximum * downsize limit on BOS, and as len the max size of * the region. It then maps the top SGROWSIZ bytes, * and auto grows the region down, up to the limit * in addr. * * If we don't use the MAP_STACK option, the effect * of this code is to allocate a stack region of a * fixed size of (STACK_SIZE - GUARD_SIZE). */ if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { /* * Some Linux apps will attempt to mmap * thread stacks near the top of their * address space. If their TOS is greater * than vm_maxsaddr, vm_map_growstack() * will confuse the thread stack with the * process stack and deliver a SEGV if they * attempt to grow the thread stack past their * current stacksize rlimit. To avoid this, * adjust vm_maxsaddr upwards to reflect * the current stacksize rlimit rather * than the maximum possible stacksize. * It would be better to adjust the * mmap'ed region, but some apps do not check * mmap's return value. */ PROC_LOCK(p); p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - lim_cur(p, RLIMIT_STACK); PROC_UNLOCK(p); } /* * This gives us our maximum stack size and a new BOS. * If we're using VM_STACK, then mmap will just map * the top SGROWSIZ bytes, and let the stack grow down * to the limit at BOS. If we're not using VM_STACK * we map the full stack, since we don't have a way * to autogrow it. */ if (len > STACK_SIZE - GUARD_SIZE) { bsd_args.addr = (caddr_t)PTRIN(addr); bsd_args.len = len; } else { bsd_args.addr = (caddr_t)PTRIN(addr) - (STACK_SIZE - GUARD_SIZE - len); bsd_args.len = STACK_SIZE - GUARD_SIZE; } } else { bsd_args.addr = (caddr_t)PTRIN(addr); bsd_args.len = len; } bsd_args.pos = pos; #ifdef DEBUG if (ldebug(mmap)) printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", __func__, (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); #endif error = sys_mmap(td, &bsd_args); #ifdef DEBUG if (ldebug(mmap)) printf("-> %s() return: 0x%x (0x%08x)\n", __func__, error, (u_int)td->td_retval[0]); #endif return (error); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { struct mprotect_args bsd_args; bsd_args.addr = uap->addr; bsd_args.len = uap->len; bsd_args.prot = uap->prot; if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) bsd_args.prot |= PROT_READ | PROT_EXEC; return (sys_mprotect(td, &bsd_args)); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; #ifdef DEBUG if (ldebug(sigaction)) printf(ARGS(sigaction, "%d, %p, %p"), args->sig, (void *)args->nsa, (void *)args->osa); #endif if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__bits[0] = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__bits[0]; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We don't use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; #ifdef DEBUG if (ldebug(sigsuspend)) printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); #endif LINUX_SIGEMPTYSET(mask); mask.__bits[0] = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; #ifdef DEBUG if (ldebug(rt_sigsuspend)) printf(ARGS(rt_sigsuspend, "%p, %d"), (void *)uap->newset, uap->sigsetsize); #endif if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; #ifdef DEBUG if (ldebug(pause)) printf(ARGS(pause, "")); #endif PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; #ifdef DEBUG if (ldebug(sigaltstack)) printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); #endif if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = PTRIN(lss.ss_sp); ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = PTROUT(oss.ss_sp); lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) { struct ftruncate_args sa; #ifdef DEBUG if (ldebug(ftruncate64)) printf(ARGS(ftruncate64, "%u, %jd"), args->fd, (intmax_t)args->length); #endif sa.fd = args->fd; sa.length = args->length; return sys_ftruncate(td, &sa); } int linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) { struct timeval atv; l_timeval atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); atv32.tv_sec = atv.tv_sec; atv32.tv_usec = atv.tv_usec; error = copyout(&atv32, uap->tp, sizeof(atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof(rtz)); } return (error); } int linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) { l_timeval atv32; struct timeval atv, *tvp; struct timezone atz, *tzp; int error; if (uap->tp) { error = copyin(uap->tp, &atv32, sizeof(atv32)); if (error) return (error); atv.tv_sec = atv32.tv_sec; atv.tv_usec = atv32.tv_usec; tvp = &atv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &atz, sizeof(atz)); if (error) return (error); tzp = &atz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) { struct l_rusage s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error != 0) return (error); if (uap->rusage != NULL) { bsd_to_linux_rusage(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; struct l_timespec ts32; int error; error = kern_sched_rr_get_interval(td, uap->pid, &ts); if (error != 0) return (error); ts32.tv_sec = ts.tv_sec; ts32.tv_nsec = ts.tv_nsec; return (copyout(&ts32, uap->interval, sizeof(ts32))); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; struct user_segment_descriptor sd; struct pcb *pcb; int a[2]; int error; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); #ifdef DEBUG if (ldebug(set_thread_area)) printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " "%i, %i, %i"), info.entry_number, info.base_addr, info.limit, info.seg_32bit, info.contents, info.read_exec_only, info.limit_in_pages, info.seg_not_present, info.useable); #endif /* * Semantics of Linux version: every thread in the system has array * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. * This syscall loads one of the selected TLS decriptors with a value * and also loads GDT descriptors 6, 7 and 8 with the content of * the per-thread descriptors. * * Semantics of FreeBSD version: I think we can ignore that Linux has * three per-thread descriptors and use just the first one. * The tls_array[] is used only in [gs]et_thread_area() syscalls and * for loading the GDT descriptors. We use just one GDT descriptor * for TLS, so we will load just one. * * XXX: This doesn't work when a user space process tries to use more * than one TLS segment. Comment in the Linux source says wine might * do this. */ /* * GLIBC reads current %gs and call set_thread_area() with it. * We should let GUDATA_SEL and GUGS32_SEL proceed as well because * we use these segments. */ switch (info.entry_number) { case GUGS32_SEL: case GUDATA_SEL: case 6: case -1: info.entry_number = GUGS32_SEL; break; default: return (EINVAL); } /* * We have to copy out the GDT entry we use. * * XXX: What if a user space program does not check the return value * and tries to use 6, 7 or 8? */ error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); if (LINUX_LDT_empty(&info)) { a[0] = 0; a[1] = 0; } else { a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); } memcpy(&sd, &a, sizeof(a)); #ifdef DEBUG if (ldebug(set_thread_area)) printf("Segment created in set_thread_area: " "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, sd.sd_long, sd.sd_def32, sd.sd_gran); #endif pcb = td->td_pcb; pcb->pcb_gsbase = (register_t)info.base_addr; set_pcb_flags(pcb, PCB_32BIT); update_gdt_gsbase(td, info.base_addr); return (0); } int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct rusage ru, *rup; struct l_rusage lru; #ifdef DEBUG if (ldebug(wait4)) printf(ARGS(wait4, "%d, %p, %d, %p"), args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) options |= WLINUXCLONE; if (args->rusage != NULL) rup = &ru; else rup = NULL; error = linux_common_wait(td, args->pid, args->status, options, rup); if (error) return (error); if (args->rusage != NULL) { bsd_to_linux_rusage(rup, &lru); error = copyout(&lru, args->rusage, sizeof(lru)); } return (error); } Index: head/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- head/sys/compat/freebsd32/freebsd32_misc.c (revision 282707) +++ head/sys/compat/freebsd32/freebsd32_misc.c (revision 282708) @@ -1,3126 +1,3136 @@ /*- * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD"); #ifndef __mips__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); #endif CTASSERT(sizeof(struct statfs32) == 256); #ifndef __mips__ CTASSERT(sizeof(struct rusage32) == 72); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); CTASSERT(sizeof(struct kevent32) == 20); CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifndef __mips__ CTASSERT(sizeof(struct stat32) == 96); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->flags); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_TEMP); } if (error == 0) td->td_retval[0] = count; return (error); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; size_t length; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = copyinstr(argp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = copyinstr(envp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; + struct vmspace *oldvmspace; int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; + struct vmspace *oldvmspace; int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } + post_execve(td, error, oldvmspace); return (error); } int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { struct mprotect_args ap; ap.addr = PTRIN(uap->addr); ap.len = uap->len; ap.prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (ap.prot & PROT_READ) != 0) ap.prot |= PROT_EXEC; #endif return (sys_mprotect(td, &ap)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { struct mmap_args ap; vm_offset_t addr = (vm_offset_t) uap->addr; vm_size_t len = uap->len; int prot = uap->prot; int flags = uap->flags; int fd = uap->fd; off_t pos = PAIR32TO64(off_t,uap->pos); #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif ap.addr = (void *) addr; ap.len = len; ap.prot = prot; ap.flags = flags; ap.fd = fd; ap.pos = pos; return (sys_mmap(td, &ap)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { struct freebsd32_mmap_args ap; ap.addr = uap->addr; ap.len = uap->len; ap.prot = uap->prot; ap.flags = uap->flags; ap.fd = uap->fd; ap.pos1 = uap->pos1; ap.pos2 = uap->pos2; return (freebsd32_mmap(td, &ap)); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { uap, freebsd32_kevent_copyout, freebsd32_kevent_copyin}; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); return (error); } int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error) return (error); if (uap->rusage != NULL) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #ifndef __mips__ #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #else #define FREEBSD32_ALIGNBYTES (sizeof(long) - 1) #endif #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; m = control; ctlbuf = msg->msg_control; while (m && len > 0) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; /* Adjust message length */ cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen; /* Copy cmsghdr */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(cm,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (len <= 0) break; /* Copy data */ copylen = datalen; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(data,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m = m->m_next; } msg->msg_controllen = (len <= 0) ? maxlen : ctlbuf - (caddr_t)msg->msg_control; exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) m_freem(control); return (error); } /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct mbuf *m; void *md; u_int idx, len, msglen; int error; buflen = FREEBSD32_ALIGN(buflen); if (buflen > MCLBYTES) return (EINVAL); /* * Iterate over the buffer and get the length of each message * in there. This has 32-bit alignment and padding. Use it to * determine the length of these messages when using 64-bit * alignment and padding. */ idx = 0; len = 0; while (idx < buflen) { error = copyin(buf + idx, &msglen, sizeof(msglen)); if (error) return (error); if (msglen < sizeof(struct cmsghdr)) return (EINVAL); msglen = FREEBSD32_ALIGN(msglen); if (idx + msglen > buflen) return (EINVAL); idx += msglen; msglen += CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); len += CMSG_ALIGN(msglen); } if (len > MCLBYTES) return (EINVAL); m = m_get(M_WAITOK, MT_CONTROL); if (len > MLEN) MCLGET(m, M_WAITOK); m->m_len = len; md = mtod(m, void *); while (buflen > 0) { error = copyin(buf, md, sizeof(struct cmsghdr)); if (error) break; msglen = *(u_int *)md; msglen = FREEBSD32_ALIGN(msglen); /* Modify the message length to account for alignment. */ *(u_int *)md = msglen + CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); md = (char *)md + CMSG_ALIGN(sizeof(struct cmsghdr)); buf += FREEBSD32_ALIGN(sizeof(struct cmsghdr)); buflen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); msglen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); if (msglen > 0) { error = copyin(buf, md, msglen); if (error) break; md = (char *)md + CMSG_ALIGN(msglen); buf += msglen; buflen -= msglen; } } if (error) m_free(m); else *mp = m; return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs s; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs s; int error; error = kern_fstatfs(td, uap->fd, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs s; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); error = kern_fhstatfs(td, fh, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { struct pread_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pread(td, &ap)); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { struct pwrite_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pwrite(td, &ap)); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { struct lseek_args nuap; nuap.fd = uap->fd; nuap.offset = uap->offset; nuap.whence = uap->whence; return (sys_lseek(td, &nuap)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; struct lseek_args ap; off_t pos; ap.fd = uap->fd; ap.offset = PAIR32TO64(off_t,uap->offset); ap.whence = uap->whence; error = sys_lseek(td, &ap); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { struct truncate_args ap; ap.path = uap->path; ap.length = PAIR32TO64(off_t,uap->length); return (sys_truncate(td, &ap)); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { struct ftruncate_args ap; ap.fd = uap->fd; ap.length = PAIR32TO64(off_t,uap->length); return (sys_ftruncate(td, &ap)); } #ifdef COMPAT_43 int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif int freebsd32_getdirentries(struct thread *td, struct freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { struct pread_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pread(td, &ap)); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { struct pwrite_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pwrite(td, &ap)); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; struct lseek_args ap; off_t pos; ap.fd = uap->fd; ap.offset = PAIR32TO64(off_t,uap->offset); ap.whence = uap->whence; error = sys_lseek(td, &ap); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { struct truncate_args ap; ap.path = uap->path; ap.length = PAIR32TO64(off_t,uap->length); return (sys_truncate(td, &ap)); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { struct ftruncate_args ap; ap.fd = uap->fd; ap.length = PAIR32TO64(off_t,uap->length); return (sys_ftruncate(td, &ap)); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); CP(*in, *out, st_size); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif int freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub, NULL); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } int freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error && error != ENOMEM) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error; error = copyin(uap->rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); if (uap->rmtp && !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) return (EFAULT); error = kern_nanosleep(td, &rqt, &rmt); if (error && uap->rmtp) { int error2; CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, uap->rmtp, sizeof(rmt32)); if (error2) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { struct cpuset_setid_args ap; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.setid = uap->setid; return (sys_cpuset_setid(td, &ap)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { struct cpuset_getid_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.setid = uap->setid; return (sys_cpuset_getid(td, &ap)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { struct cpuset_getaffinity_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.cpusetsize = uap->cpusetsize; ap.mask = uap->mask; return (sys_cpuset_getaffinity(td, &ap)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { struct cpuset_setaffinity_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.cpusetsize = uap->cpusetsize; ap.mask = uap->mask; return (sys_cpuset_setaffinity(td, &ap)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags) { if ((flags & ~SY_THR_STATIC) != 0) return (EINVAL); if (*offset == NO_SYSCALL) { int i; for (i = 1; i < SYS_MAXSYSCALL; ++i) if (freebsd32_sysent[i].sy_call == (sy_call_t *)lkmnosys) break; if (i == SYS_MAXSYSCALL) return (ENFILE); *offset = i; } else if (*offset < 0 || *offset >= SYS_MAXSYSCALL) return (EINVAL); else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys && freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys) return (EEXIST); *old_sysent = freebsd32_sysent[*offset]; freebsd32_sysent[*offset] = *new_sysent; atomic_store_rel_32(&freebsd32_sysent[*offset].sy_thrcnt, flags); return (0); } int syscall32_deregister(int *offset, struct sysent *old_sysent) { if (*offset == 0) return (0); freebsd32_sysent[*offset] = *old_sysent; return (0); } int syscall32_module_handler(struct module *mod, int what, void *arg) { struct syscall_module_data *data = (struct syscall_module_data*)arg; modspecific_t ms; int error; switch (what) { case MOD_LOAD: error = syscall32_register(data->offset, data->new_sysent, &data->old_sysent, SY_THR_STATIC_KLD); if (error) { /* Leave a mark so we know to safely unload below. */ data->offset = NULL; return error; } ms.intval = *data->offset; MOD_XLOCK; module_setspecific(mod, &ms); MOD_XUNLOCK; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); case MOD_UNLOAD: /* * MOD_LOAD failed, so just return without calling the * chained handler since we didn't pass along the MOD_LOAD * event. */ if (data->offset == NULL) return (0); if (data->chainevh) { error = data->chainevh(mod, what, data->chainarg); if (error) return (error); } error = syscall32_deregister(data->offset, &data->old_sysent); return (error); default: error = EOPNOTSUPP; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); } } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { struct syscall_helper_data *sd1; int error; for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) { error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent, &sd1->old_sysent, flags); if (error != 0) { syscall32_helper_unregister(sd); return (error); } sd1->registered = 1; } return (0); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; for (sd1 = sd; sd1->registered != 0; sd1++) { syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent); sd1->registered = 0; } return (0); } register_t * freebsd32_copyout_strings(struct image_params *imgp) { int argc, envc, i; u_int32_t *vectp; char *stringp; uintptr_t destp; u_int32_t *stack_base; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; if (imgp->proc->p_sysent->sv_sigcode_base == 0) szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); else szsigcode = 0; destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = destp; copyout(pagesizes32, (void *)destp, sizeof(pagesizes32)); imgp->pagesizeslen = sizeof(pagesizes32); destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) * sizeof(u_int32_t)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); } /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat stat; struct kld32_file_stat stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld32_file_stat_1) && version != sizeof(struct kld32_file_stat)) return (EINVAL); error = kern_kldstat(td, uap->fileid, &stat); if (error != 0) return (error); bcopy(&stat.name[0], &stat32.name[0], sizeof(stat.name)); CP(stat, stat32, refs); CP(stat, stat32, id); PTROUT_CP(stat, stat32, address); CP(stat, stat32, size); bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { td->td_retval[0] = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (0); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { td->td_retval[0] = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (0); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags; switch (uap->com) { case PROC_SPROTECT: case PROC_TRACE_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_TRACE_STATUS: data = &flags; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_TRACE_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { long tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } Index: head/sys/compat/svr4/svr4_misc.c =================================================================== --- head/sys/compat/svr4/svr4_misc.c (revision 282707) +++ head/sys/compat/svr4/svr4_misc.c (revision 282708) @@ -1,1667 +1,1681 @@ /*- * Copyright (c) 1998 Mark Newton * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * SVR4 compatibility module. * * SVR4 system calls that are implemented differently in BSD are * handled here. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) #include #include #endif #if defined(NetBSD) # if defined(UVM) # include # endif #endif #define BSD_DIRENT(cp) ((struct dirent *)(cp)) static int svr4_mknod(struct thread *, register_t *, char *, svr4_mode_t, svr4_dev_t); static __inline clock_t timeval_to_clock_t(struct timeval *); static int svr4_setinfo (pid_t , struct rusage *, int, svr4_siginfo_t *); struct svr4_hrtcntl_args; static int svr4_hrtcntl (struct thread *, struct svr4_hrtcntl_args *, register_t *); static void bsd_statfs_to_svr4_statvfs(const struct statfs *, struct svr4_statvfs *); static void bsd_statfs_to_svr4_statvfs64(const struct statfs *, struct svr4_statvfs64 *); static struct proc *svr4_pfind(pid_t pid); /* BOGUS noop */ #if defined(BOGUS) int svr4_sys_setitimer(td, uap) struct thread *td; struct svr4_sys_setitimer_args *uap; { td->td_retval[0] = 0; return 0; } #endif int svr4_sys_wait(td, uap) struct thread *td; struct svr4_sys_wait_args *uap; { int error, st, sig; error = kern_wait(td, WAIT_ANY, &st, 0, NULL); if (error) return (error); if (WIFSIGNALED(st)) { sig = WTERMSIG(st); if (sig >= 0 && sig < NSIG) st = (st & ~0177) | SVR4_BSD2SVR4_SIG(sig); } else if (WIFSTOPPED(st)) { sig = WSTOPSIG(st); if (sig >= 0 && sig < NSIG) st = (st & ~0xff00) | (SVR4_BSD2SVR4_SIG(sig) << 8); } /* * It looks like wait(2) on svr4/solaris/2.4 returns * the status in retval[1], and the pid on retval[0]. */ td->td_retval[1] = st; if (uap->status) error = copyout(&st, uap->status, sizeof(st)); return (error); } int svr4_sys_execv(td, uap) struct thread *td; struct svr4_sys_execv_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, NULL); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } int svr4_sys_execve(td, uap) struct thread *td; struct svr4_sys_execve_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } int svr4_sys_time(td, v) struct thread *td; struct svr4_sys_time_args *v; { struct svr4_sys_time_args *uap = v; int error = 0; struct timeval tv; microtime(&tv); if (uap->t) error = copyout(&tv.tv_sec, uap->t, sizeof(*(uap->t))); td->td_retval[0] = (int) tv.tv_sec; return error; } /* * Read SVR4-style directory entries. We suck them into kernel space so * that they can be massaged before being copied out to user code. * * This code is ported from the Linux emulator: Changes to the VFS interface * between FreeBSD and NetBSD have made it simpler to port it from there than * to adapt the NetBSD version. */ int svr4_sys_getdents64(td, uap) struct thread *td; struct svr4_sys_getdents64_args *uap; { struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* SVR4-format */ int resid, svr4reclen=0; /* SVR4-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; off_t off; struct svr4_dirent64 svr4_dirent; int buflen, error, eofflag, nbytes, justone; u_long *cookies = NULL, *cookiep; int ncookies; DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n", uap->fd, uap->nbytes)); error = getvnode(td->td_proc->p_fd, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } nbytes = uap->nbytes; if (nbytes == 1) { nbytes = sizeof (struct svr4_dirent64); justone = 1; } else justone = 0; off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_SHARED | LK_RETRY); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) goto out; #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies); if (error) { goto out; } inp = buf; outp = (caddr_t) uap->dp; resid = nbytes; if ((len = buflen - auio.uio_resid) <= 0) { goto eof; } cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { bdp = (struct dirent *) inp; len -= bdp->d_reclen; inp += bdp->d_reclen; cookiep++; ncookies--; } } while (len > 0) { if (cookiep && ncookies == 0) break; bdp = (struct dirent *) inp; reclen = bdp->d_reclen; if (reclen & 3) { DPRINTF(("svr4_readdir: reclen=%d\n", reclen)); error = EFAULT; goto out; } if (bdp->d_fileno == 0) { inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; len -= reclen; continue; } svr4reclen = SVR4_RECLEN(&svr4_dirent, bdp->d_namlen); if (reclen > len || resid < svr4reclen) { outp++; break; } svr4_dirent.d_ino = (long) bdp->d_fileno; if (justone) { /* * old svr4-style readdir usage. */ svr4_dirent.d_off = (svr4_off_t) svr4reclen; svr4_dirent.d_reclen = (u_short) bdp->d_namlen; } else { svr4_dirent.d_off = (svr4_off_t)(off + reclen); svr4_dirent.d_reclen = (u_short) svr4reclen; } strlcpy(svr4_dirent.d_name, bdp->d_name, sizeof(svr4_dirent.d_name)); if ((error = copyout((caddr_t)&svr4_dirent, outp, svr4reclen))) goto out; inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; outp += svr4reclen; resid -= svr4reclen; len -= reclen; if (justone) break; } if (outp == (caddr_t) uap->dp) goto again; fp->f_offset = off; if (justone) nbytes = resid + svr4reclen; eof: td->td_retval[0] = nbytes - resid; out: VOP_UNLOCK(vp, 0); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return error; } int svr4_sys_getdents(td, uap) struct thread *td; struct svr4_sys_getdents_args *uap; { struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* SVR4-format */ int resid, svr4_reclen; /* SVR4-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; struct svr4_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; u_long *cookiebuf = NULL, *cookie; int ncookies = 0, *retval = td->td_retval; if (uap->nbytes < 0) return (EINVAL); error = getvnode(td->td_proc->p_fd, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } buflen = min(MAXBSIZE, uap->nbytes); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_SHARED | LK_RETRY); off = fp->f_offset; again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookiebuf); if (error) { goto out; } inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) == 0) goto eof; for (cookie = cookiebuf; len > 0; len -= reclen) { bdp = (struct dirent *)inp; reclen = bdp->d_reclen; if (reclen & 3) panic("svr4_sys_getdents64: bad reclen"); if (cookie) off = *cookie++; /* each entry points to the next */ else off += reclen; if ((off >> 32) != 0) { uprintf("svr4_sys_getdents64: dir offset too large for emulated program"); error = EINVAL; goto out; } if (bdp->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ continue; } svr4_reclen = SVR4_RECLEN(&idb, bdp->d_namlen); if (reclen > len || resid < svr4_reclen) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make a SVR4-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). */ idb.d_ino = (svr4_ino_t)bdp->d_fileno; idb.d_off = (svr4_off_t)off; idb.d_reclen = (u_short)svr4_reclen; strlcpy(idb.d_name, bdp->d_name, sizeof(idb.d_name)); if ((error = copyout((caddr_t)&idb, outp, svr4_reclen))) goto out; /* advance past this real entry */ inp += reclen; /* advance output past SVR4-shaped entry */ outp += svr4_reclen; resid -= svr4_reclen; } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: *retval = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0); fdrop(fp, td); if (cookiebuf) free(cookiebuf, M_TEMP); free(buf, M_TEMP); return error; } int svr4_sys_mmap(td, uap) struct thread *td; struct svr4_sys_mmap_args *uap; { struct mmap_args mm; int *retval; retval = td->td_retval; #define _MAP_NEW 0x80000000 /* * Verify the arguments. */ if (uap->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return EINVAL; /* XXX still needed? */ if (uap->len == 0) return EINVAL; mm.prot = uap->prot; mm.len = uap->len; mm.flags = uap->flags & ~_MAP_NEW; mm.fd = uap->fd; mm.addr = uap->addr; mm.pos = uap->pos; return sys_mmap(td, &mm); } int svr4_sys_mmap64(td, uap) struct thread *td; struct svr4_sys_mmap64_args *uap; { struct mmap_args mm; void *rp; #define _MAP_NEW 0x80000000 /* * Verify the arguments. */ if (uap->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return EINVAL; /* XXX still needed? */ if (uap->len == 0) return EINVAL; mm.prot = uap->prot; mm.len = uap->len; mm.flags = uap->flags & ~_MAP_NEW; mm.fd = uap->fd; mm.addr = uap->addr; mm.pos = uap->pos; rp = (void *) round_page((vm_offset_t)(td->td_proc->p_vmspace->vm_daddr + maxdsiz)); if ((mm.flags & MAP_FIXED) == 0 && mm.addr != 0 && (void *)mm.addr < rp) mm.addr = rp; return sys_mmap(td, &mm); } int svr4_sys_fchroot(td, uap) struct thread *td; struct svr4_sys_fchroot_args *uap; { struct filedesc *fdp = td->td_proc->p_fd; struct vnode *vp; struct file *fp; int error; if ((error = priv_check(td, PRIV_VFS_FCHROOT)) != 0) return error; /* XXX: we have the chroot priv... what cap might we need? all? */ if ((error = getvnode(fdp, uap->fd, 0, &fp)) != 0) return error; vp = fp->f_vnode; VREF(vp); fdrop(fp, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = change_dir(vp, td); if (error) goto fail; #ifdef MAC error = mac_vnode_check_chroot(td->td_ucred, vp); if (error) goto fail; #endif VOP_UNLOCK(vp, 0); error = change_root(vp, td); vrele(vp); return (error); fail: vput(vp); return (error); } static int svr4_mknod(td, retval, path, mode, dev) struct thread *td; register_t *retval; char *path; svr4_mode_t mode; svr4_dev_t dev; { char *newpath; int error; CHECKALTEXIST(td, path, &newpath); if (S_ISFIFO(mode)) { error = kern_mkfifoat(td, AT_FDCWD, newpath, UIO_SYSSPACE, mode); } else { error = kern_mknodat(td, AT_FDCWD, newpath, UIO_SYSSPACE, mode, dev); } free(newpath, M_TEMP); return (error); } int svr4_sys_mknod(td, uap) struct thread *td; struct svr4_sys_mknod_args *uap; { int *retval = td->td_retval; return svr4_mknod(td, retval, uap->path, uap->mode, (svr4_dev_t)svr4_to_bsd_odev_t(uap->dev)); } int svr4_sys_xmknod(td, uap) struct thread *td; struct svr4_sys_xmknod_args *uap; { int *retval = td->td_retval; return svr4_mknod(td, retval, uap->path, uap->mode, (svr4_dev_t)svr4_to_bsd_dev_t(uap->dev)); } int svr4_sys_vhangup(td, uap) struct thread *td; struct svr4_sys_vhangup_args *uap; { return 0; } int svr4_sys_sysconfig(td, uap) struct thread *td; struct svr4_sys_sysconfig_args *uap; { int *retval; retval = &(td->td_retval[0]); switch (uap->name) { case SVR4_CONFIG_NGROUPS: *retval = ngroups_max; break; case SVR4_CONFIG_CHILD_MAX: *retval = maxproc; break; case SVR4_CONFIG_OPEN_FILES: *retval = maxfiles; break; case SVR4_CONFIG_POSIX_VER: *retval = 198808; break; case SVR4_CONFIG_PAGESIZE: *retval = PAGE_SIZE; break; case SVR4_CONFIG_CLK_TCK: *retval = 60; /* should this be `hz', ie. 100? */ break; case SVR4_CONFIG_XOPEN_VER: *retval = 2; /* XXX: What should that be? */ break; case SVR4_CONFIG_PROF_TCK: *retval = 60; /* XXX: What should that be? */ break; case SVR4_CONFIG_NPROC_CONF: *retval = 1; /* Only one processor for now */ break; case SVR4_CONFIG_NPROC_ONLN: *retval = 1; /* And it better be online */ break; case SVR4_CONFIG_AIO_LISTIO_MAX: case SVR4_CONFIG_AIO_MAX: case SVR4_CONFIG_AIO_PRIO_DELTA_MAX: *retval = 0; /* No aio support */ break; case SVR4_CONFIG_DELAYTIMER_MAX: *retval = 0; /* No delaytimer support */ break; case SVR4_CONFIG_MQ_OPEN_MAX: *retval = msginfo.msgmni; break; case SVR4_CONFIG_MQ_PRIO_MAX: *retval = 0; /* XXX: Don't know */ break; case SVR4_CONFIG_RTSIG_MAX: *retval = 0; break; case SVR4_CONFIG_SEM_NSEMS_MAX: *retval = seminfo.semmni; break; case SVR4_CONFIG_SEM_VALUE_MAX: *retval = seminfo.semvmx; break; case SVR4_CONFIG_SIGQUEUE_MAX: *retval = 0; /* XXX: Don't know */ break; case SVR4_CONFIG_SIGRT_MIN: case SVR4_CONFIG_SIGRT_MAX: *retval = 0; /* No real time signals */ break; case SVR4_CONFIG_TIMER_MAX: *retval = 3; /* XXX: real, virtual, profiling */ break; #if defined(NOTYET) case SVR4_CONFIG_PHYS_PAGES: #if defined(UVM) *retval = uvmexp.free; /* XXX: free instead of total */ #else *retval = vm_cnt.v_free_count; /* XXX: free instead of total */ #endif break; case SVR4_CONFIG_AVPHYS_PAGES: #if defined(UVM) *retval = uvmexp.active; /* XXX: active instead of avg */ #else *retval = vm_cnt.v_active_count;/* XXX: active instead of avg */ #endif break; #endif /* NOTYET */ case SVR4_CONFIG_COHERENCY: *retval = 0; /* XXX */ break; case SVR4_CONFIG_SPLIT_CACHE: *retval = 0; /* XXX */ break; case SVR4_CONFIG_ICACHESZ: *retval = 256; /* XXX */ break; case SVR4_CONFIG_DCACHESZ: *retval = 256; /* XXX */ break; case SVR4_CONFIG_ICACHELINESZ: *retval = 64; /* XXX */ break; case SVR4_CONFIG_DCACHELINESZ: *retval = 64; /* XXX */ break; case SVR4_CONFIG_ICACHEBLKSZ: *retval = 64; /* XXX */ break; case SVR4_CONFIG_DCACHEBLKSZ: *retval = 64; /* XXX */ break; case SVR4_CONFIG_DCACHETBLKSZ: *retval = 64; /* XXX */ break; case SVR4_CONFIG_ICACHE_ASSOC: *retval = 1; /* XXX */ break; case SVR4_CONFIG_DCACHE_ASSOC: *retval = 1; /* XXX */ break; case SVR4_CONFIG_MAXPID: *retval = PID_MAX; break; case SVR4_CONFIG_STACK_PROT: *retval = PROT_READ|PROT_WRITE|PROT_EXEC; break; default: return EINVAL; } return 0; } /* ARGSUSED */ int svr4_sys_break(td, uap) struct thread *td; struct svr4_sys_break_args *uap; { struct obreak_args ap; ap.nsize = uap->nsize; return (sys_obreak(td, &ap)); } static __inline clock_t timeval_to_clock_t(tv) struct timeval *tv; { return tv->tv_sec * hz + tv->tv_usec / (1000000 / hz); } int svr4_sys_times(td, uap) struct thread *td; struct svr4_sys_times_args *uap; { struct timeval tv, utime, stime, cutime, cstime; struct tms tms; struct proc *p; int error; p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = timeval_to_clock_t(&utime); tms.tms_stime = timeval_to_clock_t(&stime); tms.tms_cutime = timeval_to_clock_t(&cutime); tms.tms_cstime = timeval_to_clock_t(&cstime); error = copyout(&tms, uap->tp, sizeof(tms)); if (error) return (error); microtime(&tv); td->td_retval[0] = (int)timeval_to_clock_t(&tv); return (0); } int svr4_sys_ulimit(td, uap) struct thread *td; struct svr4_sys_ulimit_args *uap; { int *retval = td->td_retval; int error; switch (uap->cmd) { case SVR4_GFILLIM: PROC_LOCK(td->td_proc); *retval = lim_cur(td->td_proc, RLIMIT_FSIZE) / 512; PROC_UNLOCK(td->td_proc); if (*retval == -1) *retval = 0x7fffffff; return 0; case SVR4_SFILLIM: { struct rlimit krl; krl.rlim_cur = uap->newlimit * 512; PROC_LOCK(td->td_proc); krl.rlim_max = lim_max(td->td_proc, RLIMIT_FSIZE); PROC_UNLOCK(td->td_proc); error = kern_setrlimit(td, RLIMIT_FSIZE, &krl); if (error) return error; PROC_LOCK(td->td_proc); *retval = lim_cur(td->td_proc, RLIMIT_FSIZE); PROC_UNLOCK(td->td_proc); if (*retval == -1) *retval = 0x7fffffff; return 0; } case SVR4_GMEMLIM: { struct vmspace *vm = td->td_proc->p_vmspace; register_t r; PROC_LOCK(td->td_proc); r = lim_cur(td->td_proc, RLIMIT_DATA); PROC_UNLOCK(td->td_proc); if (r == -1) r = 0x7fffffff; r += (long) vm->vm_daddr; if (r < 0) r = 0x7fffffff; *retval = r; return 0; } case SVR4_GDESLIM: PROC_LOCK(td->td_proc); *retval = lim_cur(td->td_proc, RLIMIT_NOFILE); PROC_UNLOCK(td->td_proc); if (*retval == -1) *retval = 0x7fffffff; return 0; default: return EINVAL; } } static struct proc * svr4_pfind(pid) pid_t pid; { struct proc *p; /* look in the live processes */ if ((p = pfind(pid)) == NULL) /* look in the zombies */ p = zpfind(pid); return p; } int svr4_sys_pgrpsys(td, uap) struct thread *td; struct svr4_sys_pgrpsys_args *uap; { int *retval = td->td_retval; struct proc *p = td->td_proc; switch (uap->cmd) { case 1: /* setpgrp() */ /* * SVR4 setpgrp() (which takes no arguments) has the * semantics that the session ID is also created anew, so * in almost every sense, setpgrp() is identical to * setsid() for SVR4. (Under BSD, the difference is that * a setpgid(0,0) will not create a new session.) */ sys_setsid(td, NULL); /*FALLTHROUGH*/ case 0: /* getpgrp() */ PROC_LOCK(p); *retval = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 2: /* getsid(pid) */ if (uap->pid == 0) PROC_LOCK(p); else if ((p = svr4_pfind(uap->pid)) == NULL) return ESRCH; /* * This has already been initialized to the pid of * the session leader. */ *retval = (register_t) p->p_session->s_sid; PROC_UNLOCK(p); return 0; case 3: /* setsid() */ return sys_setsid(td, NULL); case 4: /* getpgid(pid) */ if (uap->pid == 0) PROC_LOCK(p); else if ((p = svr4_pfind(uap->pid)) == NULL) return ESRCH; *retval = (int) p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 5: /* setpgid(pid, pgid); */ { struct setpgid_args sa; sa.pid = uap->pid; sa.pgid = uap->pgid; return sys_setpgid(td, &sa); } default: return EINVAL; } } struct svr4_hrtcntl_args { int cmd; int fun; int clk; svr4_hrt_interval_t * iv; svr4_hrt_time_t * ti; }; static int svr4_hrtcntl(td, uap, retval) struct thread *td; struct svr4_hrtcntl_args *uap; register_t *retval; { switch (uap->fun) { case SVR4_HRT_CNTL_RES: DPRINTF(("htrcntl(RES)\n")); *retval = SVR4_HRT_USEC; return 0; case SVR4_HRT_CNTL_TOFD: DPRINTF(("htrcntl(TOFD)\n")); { struct timeval tv; svr4_hrt_time_t t; if (uap->clk != SVR4_HRT_CLK_STD) { DPRINTF(("clk == %d\n", uap->clk)); return EINVAL; } if (uap->ti == NULL) { DPRINTF(("ti NULL\n")); return EINVAL; } microtime(&tv); t.h_sec = tv.tv_sec; t.h_rem = tv.tv_usec; t.h_res = SVR4_HRT_USEC; return copyout(&t, uap->ti, sizeof(t)); } case SVR4_HRT_CNTL_START: DPRINTF(("htrcntl(START)\n")); return ENOSYS; case SVR4_HRT_CNTL_GET: DPRINTF(("htrcntl(GET)\n")); return ENOSYS; default: DPRINTF(("Bad htrcntl command %d\n", uap->fun)); return ENOSYS; } } int svr4_sys_hrtsys(td, uap) struct thread *td; struct svr4_sys_hrtsys_args *uap; { int *retval = td->td_retval; switch (uap->cmd) { case SVR4_HRT_CNTL: return svr4_hrtcntl(td, (struct svr4_hrtcntl_args *) uap, retval); case SVR4_HRT_ALRM: DPRINTF(("hrtalarm\n")); return ENOSYS; case SVR4_HRT_SLP: DPRINTF(("hrtsleep\n")); return ENOSYS; case SVR4_HRT_CAN: DPRINTF(("hrtcancel\n")); return ENOSYS; default: DPRINTF(("Bad hrtsys command %d\n", uap->cmd)); return EINVAL; } } static int svr4_setinfo(pid, ru, st, s) pid_t pid; struct rusage *ru; int st; svr4_siginfo_t *s; { svr4_siginfo_t i; int sig; memset(&i, 0, sizeof(i)); i.svr4_si_signo = SVR4_SIGCHLD; i.svr4_si_errno = 0; /* XXX? */ i.svr4_si_pid = pid; if (ru) { i.svr4_si_stime = ru->ru_stime.tv_sec; i.svr4_si_utime = ru->ru_utime.tv_sec; } if (WIFEXITED(st)) { i.svr4_si_status = WEXITSTATUS(st); i.svr4_si_code = SVR4_CLD_EXITED; } else if (WIFSTOPPED(st)) { sig = WSTOPSIG(st); if (sig >= 0 && sig < NSIG) i.svr4_si_status = SVR4_BSD2SVR4_SIG(sig); if (i.svr4_si_status == SVR4_SIGCONT) i.svr4_si_code = SVR4_CLD_CONTINUED; else i.svr4_si_code = SVR4_CLD_STOPPED; } else { sig = WTERMSIG(st); if (sig >= 0 && sig < NSIG) i.svr4_si_status = SVR4_BSD2SVR4_SIG(sig); if (WCOREDUMP(st)) i.svr4_si_code = SVR4_CLD_DUMPED; else i.svr4_si_code = SVR4_CLD_KILLED; } DPRINTF(("siginfo [pid %ld signo %d code %d errno %d status %d]\n", i.svr4_si_pid, i.svr4_si_signo, i.svr4_si_code, i.svr4_si_errno, i.svr4_si_status)); return copyout(&i, s, sizeof(i)); } int svr4_sys_waitsys(td, uap) struct thread *td; struct svr4_sys_waitsys_args *uap; { struct rusage ru; pid_t pid; int nfound, status; int error, *retval = td->td_retval; struct proc *p, *q; DPRINTF(("waitsys(%d, %d, %p, %x)\n", uap->grp, uap->id, uap->info, uap->options)); q = td->td_proc; switch (uap->grp) { case SVR4_P_PID: pid = uap->id; break; case SVR4_P_PGID: PROC_LOCK(q); pid = -q->p_pgid; PROC_UNLOCK(q); break; case SVR4_P_ALL: pid = WAIT_ANY; break; default: return EINVAL; } /* Hand off the easy cases to kern_wait(). */ if (!(uap->options & (SVR4_WNOWAIT)) && (uap->options & (SVR4_WEXITED | SVR4_WTRAPPED))) { int options; options = 0; if (uap->options & SVR4_WSTOPPED) options |= WUNTRACED; if (uap->options & SVR4_WCONTINUED) options |= WCONTINUED; if (uap->options & SVR4_WNOHANG) options |= WNOHANG; error = kern_wait(td, pid, &status, options, &ru); if (error) return (error); if (uap->options & SVR4_WNOHANG && *retval == 0) error = svr4_setinfo(*retval, NULL, 0, uap->info); else error = svr4_setinfo(*retval, &ru, status, uap->info); *retval = 0; return (error); } /* * Ok, handle the weird cases. Either WNOWAIT is set (meaning we * just want to see if there is a process to harvest, we don't * want to actually harvest it), or WEXIT and WTRAPPED are clear * meaning we want to ignore zombies. Either way, we don't have * to handle harvesting zombies here. We do have to duplicate the * other portions of kern_wait() though, especially for WCONTINUED * and WSTOPPED. */ loop: nfound = 0; sx_slock(&proctree_lock); LIST_FOREACH(p, &q->p_children, p_sibling) { PROC_LOCK(p); if (pid != WAIT_ANY && p->p_pid != pid && p->p_pgid != -pid) { PROC_UNLOCK(p); DPRINTF(("pid %d pgid %d != %d\n", p->p_pid, p->p_pgid, pid)); continue; } if (p_canwait(td, p)) { PROC_UNLOCK(p); continue; } nfound++; PROC_SLOCK(p); /* * See if we have a zombie. If so, WNOWAIT should be set, * as otherwise we should have called kern_wait() up above. */ if ((p->p_state == PRS_ZOMBIE) && ((uap->options & (SVR4_WEXITED|SVR4_WTRAPPED)))) { PROC_SUNLOCK(p); KASSERT(uap->options & SVR4_WNOWAIT, ("WNOWAIT is clear")); /* Found a zombie, so cache info in local variables. */ pid = p->p_pid; status = p->p_xstat; ru = p->p_ru; PROC_STATLOCK(p); calcru(p, &ru.ru_utime, &ru.ru_stime); PROC_STATUNLOCK(p); PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* Copy the info out to userland. */ *retval = 0; DPRINTF(("found %d\n", pid)); return (svr4_setinfo(pid, &ru, status, uap->info)); } /* * See if we have a stopped or continued process. * XXX: This duplicates the same code in kern_wait(). */ if ((p->p_flag & P_STOPPED_SIG) && (p->p_suspcount == p->p_numthreads) && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || uap->options & SVR4_WSTOPPED)) { PROC_SUNLOCK(p); if (((uap->options & SVR4_WNOWAIT)) == 0) p->p_flag |= P_WAITED; sx_sunlock(&proctree_lock); pid = p->p_pid; status = W_STOPCODE(p->p_xstat); ru = p->p_ru; PROC_STATLOCK(p); calcru(p, &ru.ru_utime, &ru.ru_stime); PROC_STATUNLOCK(p); PROC_UNLOCK(p); if (((uap->options & SVR4_WNOWAIT)) == 0) { PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); } *retval = 0; DPRINTF(("jobcontrol %d\n", pid)); return (svr4_setinfo(pid, &ru, status, uap->info)); } PROC_SUNLOCK(p); if (uap->options & SVR4_WCONTINUED && (p->p_flag & P_CONTINUED)) { sx_sunlock(&proctree_lock); if (((uap->options & SVR4_WNOWAIT)) == 0) p->p_flag &= ~P_CONTINUED; pid = p->p_pid; ru = p->p_ru; status = SIGCONT; PROC_STATLOCK(p); calcru(p, &ru.ru_utime, &ru.ru_stime); PROC_STATUNLOCK(p); PROC_UNLOCK(p); if (((uap->options & SVR4_WNOWAIT)) == 0) { PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); } *retval = 0; DPRINTF(("jobcontrol %d\n", pid)); return (svr4_setinfo(pid, &ru, status, uap->info)); } PROC_UNLOCK(p); } if (nfound == 0) { sx_sunlock(&proctree_lock); return (ECHILD); } if (uap->options & SVR4_WNOHANG) { sx_sunlock(&proctree_lock); *retval = 0; return (svr4_setinfo(0, NULL, 0, uap->info)); } PROC_LOCK(q); sx_sunlock(&proctree_lock); if (q->p_flag & P_STATCHILD) { q->p_flag &= ~P_STATCHILD; error = 0; } else error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "svr4_wait", 0); PROC_UNLOCK(q); if (error) return error; goto loop; } static void bsd_statfs_to_svr4_statvfs(bfs, sfs) const struct statfs *bfs; struct svr4_statvfs *sfs; { sfs->f_bsize = bfs->f_iosize; /* XXX */ sfs->f_frsize = bfs->f_bsize; sfs->f_blocks = bfs->f_blocks; sfs->f_bfree = bfs->f_bfree; sfs->f_bavail = bfs->f_bavail; sfs->f_files = bfs->f_files; sfs->f_ffree = bfs->f_ffree; sfs->f_favail = bfs->f_ffree; sfs->f_fsid = bfs->f_fsid.val[0]; memcpy(sfs->f_basetype, bfs->f_fstypename, sizeof(sfs->f_basetype)); sfs->f_flag = 0; if (bfs->f_flags & MNT_RDONLY) sfs->f_flag |= SVR4_ST_RDONLY; if (bfs->f_flags & MNT_NOSUID) sfs->f_flag |= SVR4_ST_NOSUID; sfs->f_namemax = MAXNAMLEN; memcpy(sfs->f_fstr, bfs->f_fstypename, sizeof(sfs->f_fstr)); /* XXX */ memset(sfs->f_filler, 0, sizeof(sfs->f_filler)); } static void bsd_statfs_to_svr4_statvfs64(bfs, sfs) const struct statfs *bfs; struct svr4_statvfs64 *sfs; { sfs->f_bsize = bfs->f_iosize; /* XXX */ sfs->f_frsize = bfs->f_bsize; sfs->f_blocks = bfs->f_blocks; sfs->f_bfree = bfs->f_bfree; sfs->f_bavail = bfs->f_bavail; sfs->f_files = bfs->f_files; sfs->f_ffree = bfs->f_ffree; sfs->f_favail = bfs->f_ffree; sfs->f_fsid = bfs->f_fsid.val[0]; memcpy(sfs->f_basetype, bfs->f_fstypename, sizeof(sfs->f_basetype)); sfs->f_flag = 0; if (bfs->f_flags & MNT_RDONLY) sfs->f_flag |= SVR4_ST_RDONLY; if (bfs->f_flags & MNT_NOSUID) sfs->f_flag |= SVR4_ST_NOSUID; sfs->f_namemax = MAXNAMLEN; memcpy(sfs->f_fstr, bfs->f_fstypename, sizeof(sfs->f_fstr)); /* XXX */ memset(sfs->f_filler, 0, sizeof(sfs->f_filler)); } int svr4_sys_statvfs(td, uap) struct thread *td; struct svr4_sys_statvfs_args *uap; { struct svr4_statvfs sfs; struct statfs bfs; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_statfs(td, path, UIO_SYSSPACE, &bfs); free(path, M_TEMP); if (error) return (error); bsd_statfs_to_svr4_statvfs(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_fstatvfs(td, uap) struct thread *td; struct svr4_sys_fstatvfs_args *uap; { struct svr4_statvfs sfs; struct statfs bfs; int error; error = kern_fstatfs(td, uap->fd, &bfs); if (error) return (error); bsd_statfs_to_svr4_statvfs(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_statvfs64(td, uap) struct thread *td; struct svr4_sys_statvfs64_args *uap; { struct svr4_statvfs64 sfs; struct statfs bfs; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_statfs(td, path, UIO_SYSSPACE, &bfs); free(path, M_TEMP); if (error) return (error); bsd_statfs_to_svr4_statvfs64(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_fstatvfs64(td, uap) struct thread *td; struct svr4_sys_fstatvfs64_args *uap; { struct svr4_statvfs64 sfs; struct statfs bfs; int error; error = kern_fstatfs(td, uap->fd, &bfs); if (error) return (error); bsd_statfs_to_svr4_statvfs64(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_alarm(td, uap) struct thread *td; struct svr4_sys_alarm_args *uap; { struct itimerval itv, oitv; int error; timevalclear(&itv.it_interval); itv.it_value.tv_sec = uap->sec; itv.it_value.tv_usec = 0; error = kern_setitimer(td, ITIMER_REAL, &itv, &oitv); if (error) return (error); if (oitv.it_value.tv_usec != 0) oitv.it_value.tv_sec++; td->td_retval[0] = oitv.it_value.tv_sec; return (0); } int svr4_sys_gettimeofday(td, uap) struct thread *td; struct svr4_sys_gettimeofday_args *uap; { if (uap->tp) { struct timeval atv; microtime(&atv); return copyout(&atv, uap->tp, sizeof (atv)); } return 0; } int svr4_sys_facl(td, uap) struct thread *td; struct svr4_sys_facl_args *uap; { int *retval; retval = td->td_retval; *retval = 0; switch (uap->cmd) { case SVR4_SYS_SETACL: /* We don't support acls on any filesystem */ return ENOSYS; case SVR4_SYS_GETACL: return copyout(retval, &uap->num, sizeof(uap->num)); case SVR4_SYS_GETACLCNT: return 0; default: return EINVAL; } } int svr4_sys_acl(td, uap) struct thread *td; struct svr4_sys_acl_args *uap; { /* XXX: for now the same */ return svr4_sys_facl(td, (struct svr4_sys_facl_args *)uap); } int svr4_sys_auditsys(td, uap) struct thread *td; struct svr4_sys_auditsys_args *uap; { /* * XXX: Big brother is *not* watching. */ return 0; } int svr4_sys_memcntl(td, uap) struct thread *td; struct svr4_sys_memcntl_args *uap; { switch (uap->cmd) { case SVR4_MC_SYNC: { struct msync_args msa; msa.addr = uap->addr; msa.len = uap->len; msa.flags = (int)uap->arg; return sys_msync(td, &msa); } case SVR4_MC_ADVISE: { struct madvise_args maa; maa.addr = uap->addr; maa.len = uap->len; maa.behav = (int)uap->arg; return sys_madvise(td, &maa); } case SVR4_MC_LOCK: case SVR4_MC_UNLOCK: case SVR4_MC_LOCKAS: case SVR4_MC_UNLOCKAS: return EOPNOTSUPP; default: return ENOSYS; } } int svr4_sys_nice(td, uap) struct thread *td; struct svr4_sys_nice_args *uap; { struct setpriority_args ap; int error; ap.which = PRIO_PROCESS; ap.who = 0; ap.prio = uap->prio; if ((error = sys_setpriority(td, &ap)) != 0) return error; /* the cast is stupid, but the structures are the same */ if ((error = sys_getpriority(td, (struct getpriority_args *)&ap)) != 0) return error; return 0; } int svr4_sys_resolvepath(td, uap) struct thread *td; struct svr4_sys_resolvepath_args *uap; { struct nameidata nd; int error, *retval = td->td_retval; unsigned int ncopy; NDINIT(&nd, LOOKUP, NOFOLLOW | SAVENAME, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_NO_FREE_PNBUF); ncopy = min(uap->bufsiz, strlen(nd.ni_cnd.cn_pnbuf) + 1); if ((error = copyout(nd.ni_cnd.cn_pnbuf, uap->buf, ncopy)) != 0) goto bad; *retval = ncopy; bad: NDFREE(&nd, NDF_ONLY_PNBUF); return error; } Index: head/sys/i386/ibcs2/ibcs2_misc.c =================================================================== --- head/sys/i386/ibcs2/ibcs2_misc.c (revision 282707) +++ head/sys/i386/ibcs2/ibcs2_misc.c (revision 282708) @@ -1,1265 +1,1279 @@ /*- * Copyright (c) 1995 Steven Wallace * Copyright (c) 1994, 1995 Scott Bartram * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Header: sun_misc.c,v 1.16 93/04/07 02:46:27 torek Exp * * @(#)sun_misc.c 8.1 (Berkeley) 6/18/93 */ #include __FBSDID("$FreeBSD$"); /* * IBCS2 compatibility module. * * IBCS2 system calls that are implemented differently in BSD are * handled here. */ #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int ibcs2_ulimit(td, uap) struct thread *td; struct ibcs2_ulimit_args *uap; { struct rlimit rl; struct proc *p; int error; #define IBCS2_GETFSIZE 1 #define IBCS2_SETFSIZE 2 #define IBCS2_GETPSIZE 3 #define IBCS2_GETDTABLESIZE 4 p = td->td_proc; switch (uap->cmd) { case IBCS2_GETFSIZE: PROC_LOCK(p); td->td_retval[0] = lim_cur(p, RLIMIT_FSIZE); PROC_UNLOCK(p); if (td->td_retval[0] == -1) td->td_retval[0] = 0x7fffffff; return 0; case IBCS2_SETFSIZE: PROC_LOCK(p); rl.rlim_max = lim_max(p, RLIMIT_FSIZE); PROC_UNLOCK(p); rl.rlim_cur = uap->newlimit; error = kern_setrlimit(td, RLIMIT_FSIZE, &rl); if (!error) { PROC_LOCK(p); td->td_retval[0] = lim_cur(p, RLIMIT_FSIZE); PROC_UNLOCK(p); } else { DPRINTF(("failed ")); } return error; case IBCS2_GETPSIZE: PROC_LOCK(p); td->td_retval[0] = lim_cur(p, RLIMIT_RSS); /* XXX */ PROC_UNLOCK(p); return 0; case IBCS2_GETDTABLESIZE: uap->cmd = IBCS2_SC_OPEN_MAX; return ibcs2_sysconf(td, (struct ibcs2_sysconf_args *)uap); default: return ENOSYS; } } #define IBCS2_WSTOPPED 0177 #define IBCS2_STOPCODE(sig) ((sig) << 8 | IBCS2_WSTOPPED) int ibcs2_wait(td, uap) struct thread *td; struct ibcs2_wait_args *uap; { int error, options, status; int *statusp; pid_t pid; struct trapframe *tf = td->td_frame; if ((tf->tf_eflags & (PSL_Z|PSL_PF|PSL_N|PSL_V)) == (PSL_Z|PSL_PF|PSL_N|PSL_V)) { /* waitpid */ pid = uap->a1; statusp = (int *)uap->a2; options = uap->a3; } else { /* wait */ pid = WAIT_ANY; statusp = (int *)uap->a1; options = 0; } error = kern_wait(td, pid, &status, options, NULL); if (error) return error; if (statusp) { /* * Convert status/signal result. */ if (WIFSTOPPED(status)) { if (WSTOPSIG(status) <= 0 || WSTOPSIG(status) > IBCS2_SIGTBLSZ) return (EINVAL); status = IBCS2_STOPCODE(bsd_to_ibcs2_sig[_SIG_IDX(WSTOPSIG(status))]); } else if (WIFSIGNALED(status)) { if (WTERMSIG(status) <= 0 || WTERMSIG(status) > IBCS2_SIGTBLSZ) return (EINVAL); status = bsd_to_ibcs2_sig[_SIG_IDX(WTERMSIG(status))]; } /* else exit status -- identical */ /* record result/status */ td->td_retval[1] = status; return copyout(&status, statusp, sizeof(status)); } return 0; } int ibcs2_execv(td, uap) struct thread *td; struct ibcs2_execv_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, NULL); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } int ibcs2_execve(td, uap) struct thread *td; struct ibcs2_execve_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } int ibcs2_umount(td, uap) struct thread *td; struct ibcs2_umount_args *uap; { struct unmount_args um; um.path = uap->name; um.flags = 0; return sys_unmount(td, &um); } int ibcs2_mount(td, uap) struct thread *td; struct ibcs2_mount_args *uap; { #ifdef notyet int oflags = uap->flags, nflags, error; char fsname[MFSNAMELEN]; if (oflags & (IBCS2_MS_NOSUB | IBCS2_MS_SYS5)) return (EINVAL); if ((oflags & IBCS2_MS_NEWTYPE) == 0) return (EINVAL); nflags = 0; if (oflags & IBCS2_MS_RDONLY) nflags |= MNT_RDONLY; if (oflags & IBCS2_MS_NOSUID) nflags |= MNT_NOSUID; if (oflags & IBCS2_MS_REMOUNT) nflags |= MNT_UPDATE; uap->flags = nflags; if (error = copyinstr((caddr_t)uap->type, fsname, sizeof fsname, (u_int *)0)) return (error); if (strcmp(fsname, "4.2") == 0) { uap->type = (caddr_t)STACK_ALLOC(); if (error = copyout("ufs", uap->type, sizeof("ufs"))) return (error); } else if (strcmp(fsname, "nfs") == 0) { struct ibcs2_nfs_args sna; struct sockaddr_in sain; struct nfs_args na; struct sockaddr sa; if (error = copyin(uap->data, &sna, sizeof sna)) return (error); if (error = copyin(sna.addr, &sain, sizeof sain)) return (error); bcopy(&sain, &sa, sizeof sa); sa.sa_len = sizeof(sain); uap->data = (caddr_t)STACK_ALLOC(); na.addr = (struct sockaddr *)((int)uap->data + sizeof na); na.sotype = SOCK_DGRAM; na.proto = IPPROTO_UDP; na.fh = (nfsv2fh_t *)sna.fh; na.flags = sna.flags; na.wsize = sna.wsize; na.rsize = sna.rsize; na.timeo = sna.timeo; na.retrans = sna.retrans; na.hostname = sna.hostname; if (error = copyout(&sa, na.addr, sizeof sa)) return (error); if (error = copyout(&na, uap->data, sizeof na)) return (error); } return (mount(td, uap)); #else return EINVAL; #endif } /* * Read iBCS2-style directory entries. We suck them into kernel space so * that they can be massaged before being copied out to user code. Like * SunOS, we squish out `empty' entries. * * This is quite ugly, but what do you expect from compatibility code? */ int ibcs2_getdents(td, uap) struct thread *td; register struct ibcs2_getdents_args *uap; { register struct vnode *vp; register caddr_t inp, buf; /* BSD-format */ register int len, reclen; /* BSD-format */ register caddr_t outp; /* iBCS2-format */ register int resid; /* iBCS2-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; struct ibcs2_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; u_long *cookies = NULL, *cookiep; int ncookies; #define BSD_DIRENT(cp) ((struct dirent *)(cp)) #define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short)) error = getvnode(td->td_proc->p_fd, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { /* XXX vnode readdir op should do this */ fdrop(fp, td); return (EINVAL); } off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, uap->nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_SHARED | LK_RETRY); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies)) != 0) goto out; inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { len -= BSD_DIRENT(inp)->d_reclen; inp += BSD_DIRENT(inp)->d_reclen; cookiep++; ncookies--; } } for (; len > 0; len -= reclen) { if (cookiep && ncookies == 0) break; reclen = BSD_DIRENT(inp)->d_reclen; if (reclen & 3) { printf("ibcs2_getdents: reclen=%d\n", reclen); error = EFAULT; goto out; } if (BSD_DIRENT(inp)->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; continue; } if (reclen > len || resid < IBCS2_RECLEN(reclen)) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make an iBCS2-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). */ idb.d_ino = (ibcs2_ino_t)BSD_DIRENT(inp)->d_fileno; idb.d_off = (ibcs2_off_t)off; idb.d_reclen = (u_short)IBCS2_RECLEN(reclen); if ((error = copyout((caddr_t)&idb, outp, 10)) != 0 || (error = copyout(BSD_DIRENT(inp)->d_name, outp + 10, BSD_DIRENT(inp)->d_namlen + 1)) != 0) goto out; /* advance past this real entry */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; inp += reclen; /* advance output past iBCS2-shaped entry */ outp += IBCS2_RECLEN(reclen); resid -= IBCS2_RECLEN(reclen); } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: td->td_retval[0] = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return (error); } int ibcs2_read(td, uap) struct thread *td; struct ibcs2_read_args *uap; { register struct vnode *vp; register caddr_t inp, buf; /* BSD-format */ register int len, reclen; /* BSD-format */ register caddr_t outp; /* iBCS2-format */ register int resid; /* iBCS2-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; struct ibcs2_direct { ibcs2_ino_t ino; char name[14]; } idb; off_t off; /* true file offset */ int buflen, error, eofflag, size; u_long *cookies = NULL, *cookiep; int ncookies; error = getvnode(td->td_proc->p_fd, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) { if (error == EINVAL) return sys_read(td, (struct read_args *)uap); else return error; } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return sys_read(td, (struct read_args *)uap); } off = fp->f_offset; DPRINTF(("ibcs2_read: read directory\n")); buflen = max(DIRBLKSIZ, uap->nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_SHARED | LK_RETRY); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies)) != 0) { DPRINTF(("VOP_READDIR failed: %d\n", error)); goto out; } inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { len -= BSD_DIRENT(inp)->d_reclen; inp += BSD_DIRENT(inp)->d_reclen; cookiep++; ncookies--; } } for (; len > 0 && resid > 0; len -= reclen) { if (cookiep && ncookies == 0) break; reclen = BSD_DIRENT(inp)->d_reclen; if (reclen & 3) { printf("ibcs2_read: reclen=%d\n", reclen); error = EFAULT; goto out; } if (BSD_DIRENT(inp)->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; continue; } if (reclen > len || resid < sizeof(struct ibcs2_direct)) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make an iBCS2-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). * * TODO: if length(filename) > 14, then break filename into * multiple entries and set inode = 0xffff except last */ idb.ino = (BSD_DIRENT(inp)->d_fileno > 0xfffe) ? 0xfffe : BSD_DIRENT(inp)->d_fileno; (void)copystr(BSD_DIRENT(inp)->d_name, idb.name, 14, &size); bzero(idb.name + size, 14 - size); if ((error = copyout(&idb, outp, sizeof(struct ibcs2_direct))) != 0) goto out; /* advance past this real entry */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; inp += reclen; /* advance output past iBCS2-shaped entry */ outp += sizeof(struct ibcs2_direct); resid -= sizeof(struct ibcs2_direct); } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: td->td_retval[0] = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return (error); } int ibcs2_mknod(td, uap) struct thread *td; struct ibcs2_mknod_args *uap; { char *path; int error; CHECKALTCREAT(td, uap->path, &path); if (S_ISFIFO(uap->mode)) { error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode); } else { error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode, uap->dev); } free(path, M_TEMP); return (error); } int ibcs2_getgroups(td, uap) struct thread *td; struct ibcs2_getgroups_args *uap; { struct ucred *cred; ibcs2_gid_t *iset; u_int i, ngrp; int error; cred = td->td_ucred; ngrp = cred->cr_ngroups; if (uap->gidsetsize == 0) { error = 0; goto out; } if (uap->gidsetsize < ngrp) return (EINVAL); iset = malloc(ngrp * sizeof(*iset), M_TEMP, M_WAITOK); for (i = 0; i < ngrp; i++) iset[i] = (ibcs2_gid_t)cred->cr_groups[i]; error = copyout(iset, uap->gidset, ngrp * sizeof(ibcs2_gid_t)); free(iset, M_TEMP); out: td->td_retval[0] = ngrp; return (error); } int ibcs2_setgroups(td, uap) struct thread *td; struct ibcs2_setgroups_args *uap; { ibcs2_gid_t *iset; gid_t *gp; int error, i; if (uap->gidsetsize < 0 || uap->gidsetsize > ngroups_max + 1) return (EINVAL); if (uap->gidsetsize && uap->gidset == NULL) return (EINVAL); gp = malloc(uap->gidsetsize * sizeof(*gp), M_TEMP, M_WAITOK); if (uap->gidsetsize) { iset = malloc(uap->gidsetsize * sizeof(*iset), M_TEMP, M_WAITOK); error = copyin(uap->gidset, iset, sizeof(ibcs2_gid_t) * uap->gidsetsize); if (error) { free(iset, M_TEMP); goto out; } for (i = 0; i < uap->gidsetsize; i++) gp[i] = (gid_t)iset[i]; } error = kern_setgroups(td, uap->gidsetsize, gp); out: free(gp, M_TEMP); return (error); } int ibcs2_setuid(td, uap) struct thread *td; struct ibcs2_setuid_args *uap; { struct setuid_args sa; sa.uid = (uid_t)uap->uid; return sys_setuid(td, &sa); } int ibcs2_setgid(td, uap) struct thread *td; struct ibcs2_setgid_args *uap; { struct setgid_args sa; sa.gid = (gid_t)uap->gid; return sys_setgid(td, &sa); } int ibcs2_time(td, uap) struct thread *td; struct ibcs2_time_args *uap; { struct timeval tv; microtime(&tv); td->td_retval[0] = tv.tv_sec; if (uap->tp) return copyout((caddr_t)&tv.tv_sec, (caddr_t)uap->tp, sizeof(ibcs2_time_t)); else return 0; } int ibcs2_pathconf(td, uap) struct thread *td; struct ibcs2_pathconf_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); uap->name++; /* iBCS2 _PC_* defines are offset by one */ error = kern_pathconf(td, path, UIO_SYSSPACE, uap->name, FOLLOW); free(path, M_TEMP); return (error); } int ibcs2_fpathconf(td, uap) struct thread *td; struct ibcs2_fpathconf_args *uap; { uap->name++; /* iBCS2 _PC_* defines are offset by one */ return sys_fpathconf(td, (struct fpathconf_args *)uap); } int ibcs2_sysconf(td, uap) struct thread *td; struct ibcs2_sysconf_args *uap; { int mib[2], value, len, error; struct proc *p; p = td->td_proc; switch(uap->name) { case IBCS2_SC_ARG_MAX: mib[1] = KERN_ARGMAX; break; case IBCS2_SC_CHILD_MAX: PROC_LOCK(p); td->td_retval[0] = lim_cur(td->td_proc, RLIMIT_NPROC); PROC_UNLOCK(p); return 0; case IBCS2_SC_CLK_TCK: td->td_retval[0] = hz; return 0; case IBCS2_SC_NGROUPS_MAX: mib[1] = KERN_NGROUPS; break; case IBCS2_SC_OPEN_MAX: PROC_LOCK(p); td->td_retval[0] = lim_cur(td->td_proc, RLIMIT_NOFILE); PROC_UNLOCK(p); return 0; case IBCS2_SC_JOB_CONTROL: mib[1] = KERN_JOB_CONTROL; break; case IBCS2_SC_SAVED_IDS: mib[1] = KERN_SAVED_IDS; break; case IBCS2_SC_VERSION: mib[1] = KERN_POSIX1; break; case IBCS2_SC_PASS_MAX: td->td_retval[0] = 128; /* XXX - should we create PASS_MAX ? */ return 0; case IBCS2_SC_XOPEN_VERSION: td->td_retval[0] = 2; /* XXX: What should that be? */ return 0; default: return EINVAL; } mib[0] = CTL_KERN; len = sizeof(value); error = kernel_sysctl(td, mib, 2, &value, &len, NULL, 0, NULL, 0); if (error) return error; td->td_retval[0] = value; return 0; } int ibcs2_alarm(td, uap) struct thread *td; struct ibcs2_alarm_args *uap; { struct itimerval itv, oitv; int error; timevalclear(&itv.it_interval); itv.it_value.tv_sec = uap->sec; itv.it_value.tv_usec = 0; error = kern_setitimer(td, ITIMER_REAL, &itv, &oitv); if (error) return (error); if (oitv.it_value.tv_usec != 0) oitv.it_value.tv_sec++; td->td_retval[0] = oitv.it_value.tv_sec; return (0); } int ibcs2_times(td, uap) struct thread *td; struct ibcs2_times_args *uap; { struct rusage ru; struct timeval t; struct tms tms; int error; #define CONVTCK(r) (r.tv_sec * hz + r.tv_usec / (1000000 / hz)) error = kern_getrusage(td, RUSAGE_SELF, &ru); if (error) return (error); tms.tms_utime = CONVTCK(ru.ru_utime); tms.tms_stime = CONVTCK(ru.ru_stime); error = kern_getrusage(td, RUSAGE_CHILDREN, &ru); if (error) return (error); tms.tms_cutime = CONVTCK(ru.ru_utime); tms.tms_cstime = CONVTCK(ru.ru_stime); microtime(&t); td->td_retval[0] = CONVTCK(t); return (copyout(&tms, uap->tp, sizeof(struct tms))); } int ibcs2_stime(td, uap) struct thread *td; struct ibcs2_stime_args *uap; { struct timeval tv; long secs; int error; error = copyin(uap->timep, &secs, sizeof(long)); if (error) return (error); tv.tv_sec = secs; tv.tv_usec = 0; error = kern_settimeofday(td, &tv, NULL); if (error) error = EPERM; return (error); } int ibcs2_utime(td, uap) struct thread *td; struct ibcs2_utime_args *uap; { struct ibcs2_utimbuf ubuf; struct timeval tbuf[2], *tp; char *path; int error; if (uap->buf) { error = copyin(uap->buf, &ubuf, sizeof(ubuf)); if (error) return (error); tbuf[0].tv_sec = ubuf.actime; tbuf[0].tv_usec = 0; tbuf[1].tv_sec = ubuf.modtime; tbuf[1].tv_usec = 0; tp = tbuf; } else tp = NULL; CHECKALTEXIST(td, uap->path, &path); error = kern_utimesat(td, AT_FDCWD, path, UIO_SYSSPACE, tp, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int ibcs2_nice(td, uap) struct thread *td; struct ibcs2_nice_args *uap; { int error; struct setpriority_args sa; sa.which = PRIO_PROCESS; sa.who = 0; sa.prio = td->td_proc->p_nice + uap->incr; if ((error = sys_setpriority(td, &sa)) != 0) return EPERM; td->td_retval[0] = td->td_proc->p_nice; return 0; } /* * iBCS2 getpgrp, setpgrp, setsid, and setpgid */ int ibcs2_pgrpsys(td, uap) struct thread *td; struct ibcs2_pgrpsys_args *uap; { struct proc *p = td->td_proc; switch (uap->type) { case 0: /* getpgrp */ PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 1: /* setpgrp */ { struct setpgid_args sa; sa.pid = 0; sa.pgid = 0; sys_setpgid(td, &sa); PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; } case 2: /* setpgid */ { struct setpgid_args sa; sa.pid = uap->pid; sa.pgid = uap->pgid; return sys_setpgid(td, &sa); } case 3: /* setsid */ return sys_setsid(td, NULL); default: return EINVAL; } } /* * XXX - need to check for nested calls */ int ibcs2_plock(td, uap) struct thread *td; struct ibcs2_plock_args *uap; { int error; #define IBCS2_UNLOCK 0 #define IBCS2_PROCLOCK 1 #define IBCS2_TEXTLOCK 2 #define IBCS2_DATALOCK 4 switch(uap->cmd) { case IBCS2_UNLOCK: error = priv_check(td, PRIV_VM_MUNLOCK); if (error) return (error); /* XXX - TODO */ return (0); case IBCS2_PROCLOCK: case IBCS2_TEXTLOCK: case IBCS2_DATALOCK: error = priv_check(td, PRIV_VM_MLOCK); if (error) return (error); /* XXX - TODO */ return 0; } return EINVAL; } int ibcs2_uadmin(td, uap) struct thread *td; struct ibcs2_uadmin_args *uap; { #define SCO_A_REBOOT 1 #define SCO_A_SHUTDOWN 2 #define SCO_A_REMOUNT 4 #define SCO_A_CLOCK 8 #define SCO_A_SETCONFIG 128 #define SCO_A_GETDEV 130 #define SCO_AD_HALT 0 #define SCO_AD_BOOT 1 #define SCO_AD_IBOOT 2 #define SCO_AD_PWRDOWN 3 #define SCO_AD_PWRNAP 4 #define SCO_AD_PANICBOOT 1 #define SCO_AD_GETBMAJ 0 #define SCO_AD_GETCMAJ 1 switch(uap->cmd) { case SCO_A_REBOOT: case SCO_A_SHUTDOWN: switch(uap->func) { struct reboot_args r; case SCO_AD_HALT: case SCO_AD_PWRDOWN: case SCO_AD_PWRNAP: r.opt = RB_HALT; return (sys_reboot(td, &r)); case SCO_AD_BOOT: case SCO_AD_IBOOT: r.opt = RB_AUTOBOOT; return (sys_reboot(td, &r)); } return EINVAL; case SCO_A_REMOUNT: case SCO_A_CLOCK: case SCO_A_SETCONFIG: return 0; case SCO_A_GETDEV: return EINVAL; /* XXX - TODO */ } return EINVAL; } int ibcs2_sysfs(td, uap) struct thread *td; struct ibcs2_sysfs_args *uap; { #define IBCS2_GETFSIND 1 #define IBCS2_GETFSTYP 2 #define IBCS2_GETNFSTYP 3 switch(uap->cmd) { case IBCS2_GETFSIND: case IBCS2_GETFSTYP: case IBCS2_GETNFSTYP: break; } return EINVAL; /* XXX - TODO */ } int ibcs2_unlink(td, uap) struct thread *td; struct ibcs2_unlink_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0); free(path, M_TEMP); return (error); } int ibcs2_chdir(td, uap) struct thread *td; struct ibcs2_chdir_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_chdir(td, path, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int ibcs2_chmod(td, uap) struct thread *td; struct ibcs2_chmod_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode, 0); free(path, M_TEMP); return (error); } int ibcs2_chown(td, uap) struct thread *td; struct ibcs2_chown_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->uid, uap->gid, 0); free(path, M_TEMP); return (error); } int ibcs2_rmdir(td, uap) struct thread *td; struct ibcs2_rmdir_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int ibcs2_mkdir(td, uap) struct thread *td; struct ibcs2_mkdir_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode); free(path, M_TEMP); return (error); } int ibcs2_symlink(td, uap) struct thread *td; struct ibcs2_symlink_args *uap; { char *path, *link; int error; CHECKALTEXIST(td, uap->path, &path); /* * Have to expand CHECKALTCREAT() so that 'path' can be freed on * errors. */ error = ibcs2_emul_find(td, uap->link, UIO_USERSPACE, &link, 1); if (link == NULL) { free(path, M_TEMP); return (error); } error = kern_symlinkat(td, path, AT_FDCWD, link, UIO_SYSSPACE); free(path, M_TEMP); free(link, M_TEMP); return (error); } int ibcs2_rename(td, uap) struct thread *td; struct ibcs2_rename_args *uap; { char *from, *to; int error; CHECKALTEXIST(td, uap->from, &from); /* * Have to expand CHECKALTCREAT() so that 'from' can be freed on * errors. */ error = ibcs2_emul_find(td, uap->to, UIO_USERSPACE, &to, 1); if (to == NULL) { free(from, M_TEMP); return (error); } error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); free(from, M_TEMP); free(to, M_TEMP); return (error); } int ibcs2_readlink(td, uap) struct thread *td; struct ibcs2_readlink_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_readlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->buf, UIO_USERSPACE, uap->count); free(path, M_TEMP); return (error); } Index: head/sys/i386/linux/linux_machdep.c =================================================================== --- head/sys/i386/linux/linux_machdep.c (revision 282707) +++ head/sys/i386/linux/linux_machdep.c (revision 282708) @@ -1,1073 +1,1081 @@ /*- * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needed for pcb definition in linux_set_thread_area */ #include "opt_posix.h" extern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */ struct l_descriptor { l_uint entry_number; l_ulong base_addr; l_uint limit; l_uint seg_32bit:1; l_uint contents:2; l_uint read_exec_only:1; l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; }; struct l_old_select_argv { l_int nfds; l_fd_set *readfds; l_fd_set *writefds; l_fd_set *exceptfds; struct l_timeval *timeout; }; static int linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, l_int flags, l_int fd, l_loff_t pos); int linux_to_bsd_sigaltstack(int lsa) { int bsa = 0; if (lsa & LINUX_SS_DISABLE) bsa |= SS_DISABLE; if (lsa & LINUX_SS_ONSTACK) bsa |= SS_ONSTACK; return (bsa); } int bsd_to_linux_sigaltstack(int bsa) { int lsa = 0; if (bsa & SS_DISABLE) lsa |= LINUX_SS_DISABLE; if (bsa & SS_ONSTACK) lsa |= LINUX_SS_ONSTACK; return (lsa); } int linux_execve(struct thread *td, struct linux_execve_args *args) { - int error; - char *newpath; struct image_args eargs; + struct vmspace *oldvmspace; + char *newpath; + int error; LCONVPATHEXIST(td, args->path, &newpath); #ifdef DEBUG if (ldebug(execve)) printf(ARGS(execve, "%s"), newpath); #endif + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(newpath, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, args->argp, args->envp); free(newpath, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); - if (error == 0) + if (error == 0) { /* linux process can exec fbsd one, dont attempt * to create emuldata for such process using * linux_proc_init, this leads to a panic on KASSERT * because such process has p->p_emuldata == NULL */ if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) error = linux_proc_init(td, 0, 0); + } + post_execve(td, error, oldvmspace); return (error); } struct l_ipc_kludge { struct l_msgbuf *msgp; l_long msgtyp; }; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = args->ptr; a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(args->ptr, &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = args->ptr; a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == NULL) return (EINVAL); error = copyin(args->ptr, &tmp, sizeof(tmp)); if (error) return (error); a.msgp = tmp.msgp; a.msgtyp = tmp.msgtyp; } else { a.msgp = args->ptr; a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = args->ptr; return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; a.shmid = args->arg1; a.shmaddr = args->ptr; a.shmflg = args->arg2; a.raddr = (l_ulong *)args->arg3; return (linux_shmat(td, &a)); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = args->ptr; return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = args->ptr; return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; #ifdef DEBUG if (ldebug(old_select)) printf(ARGS(old_select, "%p"), args->ptr); #endif error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = linux_args.readfds; newsel.writefds = linux_args.writefds; newsel.exceptfds = linux_args.exceptfds; newsel.timeout = linux_args.timeout; return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct segment_descriptor sd; struct l_user_desc info; int idx, error; int a[2]; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { printf(LMSG("copyin failed!")); } else { idx = info.entry_number; /* * looks like we're getting the idx we returned * in the set_thread_area() syscall */ if (idx != 6 && idx != 3) { printf(LMSG("resetting idx!")); idx = 3; } /* this doesnt happen in practice */ if (idx == 6) { /* we might copy out the entry_number as 3 */ info.entry_number = 3; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) printf(LMSG("copyout failed!")); } a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); memcpy(&sd, &a, sizeof(a)); #ifdef DEBUG if (ldebug(clone)) printf("Segment created in clone with " "CLONE_SETTLS: lobase: %x, hibase: %x, " "lolimit: %x, hilimit: %x, type: %i, " "dpl: %i, p: %i, xx: %i, def32: %i, " "gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran); #endif /* set %gs */ td->td_pcb->pcb_gsd = sd; td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); } return (error); } int linux_set_upcall_kse(struct thread *td, register_t stack) { td->td_frame->tf_esp = stack; return (0); } #define STACK_SIZE (2 * 1024 * 1024) #define GUARD_SIZE (4 * PAGE_SIZE) int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { #ifdef DEBUG if (ldebug(mmap2)) printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"), (void *)args->addr, args->len, args->prot, args->flags, args->fd, args->pgoff); #endif return (linux_mmap_common(td, args->addr, args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); #ifdef DEBUG if (ldebug(mmap)) printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"), (void *)linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pgoff); #endif return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } static int linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, l_int flags, l_int fd, l_loff_t pos) { struct proc *p = td->td_proc; struct mmap_args /* { caddr_t addr; size_t len; int prot; int flags; int fd; long pad; off_t pos; } */ bsd_args; int error; struct file *fp; cap_rights_t rights; error = 0; bsd_args.flags = 0; fp = NULL; /* * Linux mmap(2): * You must specify exactly one of MAP_SHARED and MAP_PRIVATE */ if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) return (EINVAL); if (flags & LINUX_MAP_SHARED) bsd_args.flags |= MAP_SHARED; if (flags & LINUX_MAP_PRIVATE) bsd_args.flags |= MAP_PRIVATE; if (flags & LINUX_MAP_FIXED) bsd_args.flags |= MAP_FIXED; if (flags & LINUX_MAP_ANON) { /* Enforce pos to be on page boundary, then ignore. */ if ((pos & PAGE_MASK) != 0) return (EINVAL); pos = 0; bsd_args.flags |= MAP_ANON; } else bsd_args.flags |= MAP_NOSYNC; if (flags & LINUX_MAP_GROWSDOWN) bsd_args.flags |= MAP_STACK; /* * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC * on Linux/i386. We do this to ensure maximum compatibility. * Linux/ia64 does the same in i386 emulation mode. */ bsd_args.prot = prot; if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) bsd_args.prot |= PROT_READ | PROT_EXEC; /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; if (bsd_args.fd != -1) { /* * Linux follows Solaris mmap(2) description: * The file descriptor fildes is opened with * read permission, regardless of the * protection options specified. * * Checking just CAP_MMAP is fine here, since the real work * is done in the FreeBSD mmap(). */ error = fget(td, bsd_args.fd, cap_rights_init(&rights, CAP_MMAP), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); return (EINVAL); } /* Linux mmap() just fails for O_WRONLY files */ if (!(fp->f_flag & FREAD)) { fdrop(fp, td); return (EACCES); } fdrop(fp, td); } if (flags & LINUX_MAP_GROWSDOWN) { /* * The Linux MAP_GROWSDOWN option does not limit auto * growth of the region. Linux mmap with this option * takes as addr the inital BOS, and as len, the initial * region size. It can then grow down from addr without * limit. However, linux threads has an implicit internal * limit to stack size of STACK_SIZE. Its just not * enforced explicitly in linux. But, here we impose * a limit of (STACK_SIZE - GUARD_SIZE) on the stack * region, since we can do this with our mmap. * * Our mmap with MAP_STACK takes addr as the maximum * downsize limit on BOS, and as len the max size of * the region. It them maps the top SGROWSIZ bytes, * and auto grows the region down, up to the limit * in addr. * * If we don't use the MAP_STACK option, the effect * of this code is to allocate a stack region of a * fixed size of (STACK_SIZE - GUARD_SIZE). */ if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { /* * Some linux apps will attempt to mmap * thread stacks near the top of their * address space. If their TOS is greater * than vm_maxsaddr, vm_map_growstack() * will confuse the thread stack with the * process stack and deliver a SEGV if they * attempt to grow the thread stack past their * current stacksize rlimit. To avoid this, * adjust vm_maxsaddr upwards to reflect * the current stacksize rlimit rather * than the maximum possible stacksize. * It would be better to adjust the * mmap'ed region, but some apps do not check * mmap's return value. */ PROC_LOCK(p); p->p_vmspace->vm_maxsaddr = (char *)USRSTACK - lim_cur(p, RLIMIT_STACK); PROC_UNLOCK(p); } /* * This gives us our maximum stack size and a new BOS. * If we're using VM_STACK, then mmap will just map * the top SGROWSIZ bytes, and let the stack grow down * to the limit at BOS. If we're not using VM_STACK * we map the full stack, since we don't have a way * to autogrow it. */ if (len > STACK_SIZE - GUARD_SIZE) { bsd_args.addr = (caddr_t)PTRIN(addr); bsd_args.len = len; } else { bsd_args.addr = (caddr_t)PTRIN(addr) - (STACK_SIZE - GUARD_SIZE - len); bsd_args.len = STACK_SIZE - GUARD_SIZE; } } else { bsd_args.addr = (caddr_t)PTRIN(addr); bsd_args.len = len; } bsd_args.pos = pos; #ifdef DEBUG if (ldebug(mmap)) printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", __func__, (void *)bsd_args.addr, bsd_args.len, bsd_args.prot, bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); #endif error = sys_mmap(td, &bsd_args); #ifdef DEBUG if (ldebug(mmap)) printf("-> %s() return: 0x%x (0x%08x)\n", __func__, error, (u_int)td->td_retval[0]); #endif return (error); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { struct mprotect_args bsd_args; bsd_args.addr = uap->addr; bsd_args.len = uap->len; bsd_args.prot = uap->prot; if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) bsd_args.prot |= PROT_READ | PROT_EXEC; return (sys_mprotect(td, &bsd_args)); } int linux_ioperm(struct thread *td, struct linux_ioperm_args *args) { int error; struct i386_ioperm_args iia; iia.start = args->start; iia.length = args->length; iia.enable = args->enable; error = i386_set_ioperm(td, &iia); return (error); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) { int error; struct i386_ldt_args ldt; struct l_descriptor ld; union descriptor desc; int size, written; switch (uap->func) { case 0x00: /* read_ldt */ ldt.start = 0; ldt.descs = uap->ptr; ldt.num = uap->bytecount / sizeof(union descriptor); error = i386_get_ldt(td, &ldt); td->td_retval[0] *= sizeof(union descriptor); break; case 0x02: /* read_default_ldt = 0 */ size = 5*sizeof(struct l_desc_struct); if (size > uap->bytecount) size = uap->bytecount; for (written = error = 0; written < size && error == 0; written++) error = subyte((char *)uap->ptr + written, 0); td->td_retval[0] = written; break; case 0x01: /* write_ldt */ case 0x11: /* write_ldt */ if (uap->bytecount != sizeof(ld)) return (EINVAL); error = copyin(uap->ptr, &ld, sizeof(ld)); if (error) return (error); ldt.start = ld.entry_number; ldt.descs = &desc; ldt.num = 1; desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | (ld.contents << 2); desc.sd.sd_dpl = 3; desc.sd.sd_p = (ld.seg_not_present ^ 1); desc.sd.sd_xx = 0; desc.sd.sd_def32 = ld.seg_32bit; desc.sd.sd_gran = ld.limit_in_pages; error = i386_set_ldt(td, &ldt, &desc); break; default: error = ENOSYS; break; } if (error == EOPNOTSUPP) { printf("linux: modify_ldt needs kernel option USER_LDT\n"); error = ENOSYS; } return (error); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; #ifdef DEBUG if (ldebug(sigaction)) printf(ARGS(sigaction, "%d, %p, %p"), args->sig, (void *)args->nsa, (void *)args->osa); #endif if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__bits[0] = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__bits[0]; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We dont use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; #ifdef DEBUG if (ldebug(sigsuspend)) printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); #endif LINUX_SIGEMPTYSET(mask); mask.__bits[0] = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; #ifdef DEBUG if (ldebug(rt_sigsuspend)) printf(ARGS(rt_sigsuspend, "%p, %d"), (void *)uap->newset, uap->sigsetsize); #endif if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; #ifdef DEBUG if (ldebug(pause)) printf(ARGS(pause, "")); #endif PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; #ifdef DEBUG if (ldebug(sigaltstack)) printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); #endif if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = lss.ss_sp; ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = oss.ss_sp; lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) { struct ftruncate_args sa; #ifdef DEBUG if (ldebug(ftruncate64)) printf(ARGS(ftruncate64, "%u, %jd"), args->fd, (intmax_t)args->length); #endif sa.fd = args->fd; sa.length = args->length; return sys_ftruncate(td, &sa); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; int error; int idx; int a[2]; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); #ifdef DEBUG if (ldebug(set_thread_area)) printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"), info.entry_number, info.base_addr, info.limit, info.seg_32bit, info.contents, info.read_exec_only, info.limit_in_pages, info.seg_not_present, info.useable); #endif idx = info.entry_number; /* * Semantics of linux version: every thread in the system has array of * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This * syscall loads one of the selected tls decriptors with a value and * also loads GDT descriptors 6, 7 and 8 with the content of the * per-thread descriptors. * * Semantics of fbsd version: I think we can ignore that linux has 3 * per-thread descriptors and use just the 1st one. The tls_array[] * is used only in set/get-thread_area() syscalls and for loading the * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so * we will load just one. * * XXX: this doesn't work when a user space process tries to use more * than 1 TLS segment. Comment in the linux sources says wine might do * this. */ /* * we support just GLIBC TLS now * we should let 3 proceed as well because we use this segment so * if code does two subsequent calls it should succeed */ if (idx != 6 && idx != -1 && idx != 3) return (EINVAL); /* * we have to copy out the GDT entry we use * FreeBSD uses GDT entry #3 for storing %gs so load that * * XXX: what if a user space program doesn't check this value and tries * to use 6, 7 or 8? */ idx = info.entry_number = 3; error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); if (LINUX_LDT_empty(&info)) { a[0] = 0; a[1] = 0; } else { a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); } memcpy(&sd, &a, sizeof(a)); #ifdef DEBUG if (ldebug(set_thread_area)) printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran); #endif /* this is taken from i386 version of cpu_set_user_tls() */ critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); critical_exit(); return (0); } int linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) { struct l_user_desc info; int error; int idx; struct l_desc_struct desc; struct segment_descriptor sd; #ifdef DEBUG if (ldebug(get_thread_area)) printf(ARGS(get_thread_area, "%p"), args->desc); #endif error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* XXX: I am not sure if we want 3 to be allowed too. */ if (idx != 6 && idx != 3) return (EINVAL); idx = 3; memset(&info, 0, sizeof(info)); sd = PCPU_GET(fsgs_gdt)[1]; memcpy(&desc, &sd, sizeof(desc)); info.entry_number = idx; info.base_addr = LINUX_GET_BASE(&desc); info.limit = LINUX_GET_LIMIT(&desc); info.seg_32bit = LINUX_GET_32BIT(&desc); info.contents = LINUX_GET_CONTENTS(&desc); info.read_exec_only = !LINUX_GET_WRITABLE(&desc); info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); info.seg_not_present = !LINUX_GET_PRESENT(&desc); info.useable = LINUX_GET_USEABLE(&desc); error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (EFAULT); return (0); } /* XXX: this wont work with module - convert it */ int linux_mq_open(struct thread *td, struct linux_mq_open_args *args) { #ifdef P1003_1B_MQUEUE return sys_kmq_open(td, (struct kmq_open_args *) args); #else return (ENOSYS); #endif } int linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) { #ifdef P1003_1B_MQUEUE return sys_kmq_unlink(td, (struct kmq_unlink_args *) args); #else return (ENOSYS); #endif } int linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) { #ifdef P1003_1B_MQUEUE return sys_kmq_timedsend(td, (struct kmq_timedsend_args *) args); #else return (ENOSYS); #endif } int linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) { #ifdef P1003_1B_MQUEUE return sys_kmq_timedreceive(td, (struct kmq_timedreceive_args *) args); #else return (ENOSYS); #endif } int linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) { #ifdef P1003_1B_MQUEUE return sys_kmq_notify(td, (struct kmq_notify_args *) args); #else return (ENOSYS); #endif } int linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) { #ifdef P1003_1B_MQUEUE return sys_kmq_setattr(td, (struct kmq_setattr_args *) args); #else return (ENOSYS); #endif } int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct rusage ru, *rup; #ifdef DEBUG if (ldebug(wait4)) printf(ARGS(wait4, "%d, %p, %d, %p"), args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) options |= WLINUXCLONE; if (args->rusage != NULL) rup = &ru; else rup = NULL; error = linux_common_wait(td, args->pid, args->status, options, rup); if (error) return (error); if (args->rusage != NULL) error = copyout(&ru, args->rusage, sizeof(ru)); return (error); } Index: head/sys/kern/kern_exec.c =================================================================== --- head/sys/kern/kern_exec.c (revision 282707) +++ head/sys/kern/kern_exec.c (revision 282708) @@ -1,1511 +1,1522 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, NULL, 0, sysctl_kern_ps_strings, "LU", ""); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, sysctl_kern_stackprot, "I", ""); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, ""); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_psstrings; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, sizeof(p->p_sysent->sv_psstrings)); return error; } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_usrstack; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, sizeof(p->p_sysent->sv_usrstack)); return error; } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int -sys_execve(td, uap) - struct thread *td; - struct execve_args /* { - char *fname; - char **argv; - char **envv; - } */ *uap; +sys_execve(struct thread *td, struct execve_args *uap) { - int error; struct image_args args; + struct vmspace *oldvmspace; + int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL); + post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; } #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { - int error; struct image_args args; + struct vmspace *oldvmspace; + int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL); } + post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int -sys___mac_execve(td, uap) - struct thread *td; - struct __mac_execve_args /* { - char *fname; - char **argv; - char **envv; - struct mac *mac_p; - } */ *uap; +sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC - int error; struct image_args args; + struct vmspace *oldvmspace; + int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p); + post_execve(td, error, oldvmspace); return (error); #else return (ENOSYS); #endif } -/* - * XXX: kern_execve has the astonishing property of not always returning to - * the caller. If sufficiently bad things happen during the call to - * do_execve(), it can end up calling exit1(); as a result, callers must - * avoid doing anything which they might need to undo (e.g., allocating - * memory). - */ int -kern_execve(td, args, mac_p) - struct thread *td; - struct image_args *args; - struct mac *mac_p; +pre_execve(struct thread *td, struct vmspace **oldvmspace) { - struct proc *p = td->td_proc; - struct vmspace *oldvmspace; + struct proc *p; int error; - AUDIT_ARG_ARGV(args->begin_argv, args->argc, - args->begin_envv - args->begin_argv); - AUDIT_ARG_ENVV(args->begin_envv, args->envc, - args->endp - args->begin_envv); - if (p->p_flag & P_HADTHREADS) { + KASSERT(td == curthread, ("non-current thread %p", td)); + error = 0; + p = td->td_proc; + if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); - if (thread_single(p, SINGLE_BOUNDARY)) { - PROC_UNLOCK(p); - exec_free_args(args); - return (ERESTART); /* Try again later. */ - } + if (thread_single(p, SINGLE_BOUNDARY) != 0) + error = ERESTART; PROC_UNLOCK(p); } + KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, + ("nested execve")); + *oldvmspace = p->p_vmspace; + return (error); +} - KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); - oldvmspace = td->td_proc->p_vmspace; - error = do_execve(td, args, mac_p); +void +post_execve(struct thread *td, int error, struct vmspace *oldvmspace) +{ + struct proc *p; - if (p->p_flag & P_HADTHREADS) { + KASSERT(td == curthread, ("non-current thread %p", td)); + p = td->td_proc; + if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == 0) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } if ((td->td_pflags & TDP_EXECVMSPC) != 0) { - KASSERT(td->td_proc->p_vmspace != oldvmspace, + KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } +} - return (error); +/* + * XXX: kern_execve has the astonishing property of not always returning to + * the caller. If sufficiently bad things happen during the call to + * do_execve(), it can end up calling exit1(); as a result, callers must + * avoid doing anything which they might need to undo (e.g., allocating + * memory). + */ +int +kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p) +{ + + AUDIT_ARG_ARGV(args->begin_argv, args->argc, + args->begin_envv - args->begin_argv); + AUDIT_ARG_ENVV(args->begin_envv, args->envc, + args->endp - args->begin_envv); + return (do_execve(td, args, mac_p)); } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(td, args, mac_p) struct thread *td; struct image_args *args; struct mac *mac_p; { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *newcred = NULL, *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts, *newsigacts; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif struct vnode *textvp = NULL, *binvp; cap_rights_t rights; int credential_changing; int textset; #ifdef MAC struct label *interpvplabel = NULL; int will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif /* * Translate the file name. namei() returns a vnode pointer * in ni_vp amoung other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. */ if (args->fname != NULL) { NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif error = namei(&nd); if (error) goto exec_fail; binvp = nd.ni_vp; imgp->vp = binvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, cap_rights_init(&rights, CAP_FEXECVE), &binvp); if (error) goto exec_fail; vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(binvp); imgp->vp = binvp; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); /* * Set VV_TEXT now so no one can write to the executable while we're * activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ textset = VOP_IS_TEXT(imgp->vp); VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) { if (textset == 0) VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * VV_TEXT needs to be unset for scripts. There is a short * period before we determine that something is a script where * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC mac_execve_interpreter_enter(binvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(binvp, FREAD, td->td_ucred, td); imgp->opened = 0; } vput(binvp); vm_object_deallocate(imgp->object); imgp->object = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp, 0); /* * Do the best to calculate the full path to the image file. */ if (imgp->auxargs != NULL && ((args->fname != NULL && args->fname[0] == '/') || vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) imgp->execpath = args->fname; if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Copy out strings (args and env) and initialize stack base */ if (p->p_sysent->sv_copyout_strings) stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); else stack_base = exec_copyout_strings(imgp); /* * If custom stack fixup routine present for this process * let it do the stack setup. * Else stuff argument count as first item on stack */ if (p->p_sysent->sv_fixup != NULL) (*p->p_sysent->sv_fixup)(&stack_base, imgp); else suword(--stack_base, imgp->args->argc); /* * For security and other reasons, the file descriptor table cannot * be shared after an exec. */ fdunshare(td); /* close files on exec */ fdcloseexec(td); /* * Malloc things before we need locks. */ i = imgp->args->begin_envv - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); /* Get a reference to the vnode prior to locking the proc */ VREF(binvp); /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } else { oldsigacts = NULL; newsigacts = NULL; /* satisfy gcc */ } PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; oldcred = p->p_ucred; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); } /* * Implement image setuid/setgid. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in compatibility mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = 0; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp); credential_changing |= will_transition; #endif if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE if (p->p_tracecred != NULL && priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0)) ktrprocexec(p, &tracecred, &tracevp); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); fdsetugidsafety(td); error = fdcheckstd(td); if (error != 0) goto done1; newcred = crdup(oldcred); euip = uifind(attr.va_uid); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); /* * Set the new credentials. */ if (attr.va_mode & S_ISUID) change_euid(newcred, euip); if (attr.va_mode & S_ISGID) change_egid(newcred, attr.va_gid); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, newcred, imgp->vp, interpvplabel, imgp); } #endif /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(newcred, newcred->cr_uid); change_svgid(newcred, newcred->cr_gid); proc_set_cred(p, newcred); } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); newcred = crdup(oldcred); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); change_svuid(newcred, newcred->cr_uid); change_svgid(newcred, newcred->cr_gid); proc_set_cred(p, newcred); } } /* * Store the vp for use in procfs. This vnode was referenced prior * to locking the proc lock. */ textvp = p->p_textvp; p->p_textvp = binvp; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. * * The proc lock needs to be released before taking the PMC * SX. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } else PROC_UNLOCK(p); #else /* !HWPMC_HOOKS */ PROC_UNLOCK(p); #endif /* Set values passed into the program in registers. */ if (p->p_sysent->sv_setregs) (*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base); else exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); vfs_mark_atime(imgp->vp, td->td_ucred); SDT_PROBE(proc, kernel, , exec__success, args->fname, 0, 0, 0, 0); VOP_UNLOCK(imgp->vp, 0); done1: /* * Free any resources malloc'd earlier that we didn't use. */ if (euip != NULL) uifree(euip); if (newcred != NULL) crfree(oldcred); /* * Handle deferred decrement of ref counts. */ if (textvp != NULL) vrele(textvp); if (error != 0) vrele(binvp); #ifdef KTRACE if (tracevp != NULL) vrele(tracevp); if (tracecred != NULL) crfree(tracecred); #endif vn_lock(imgp->vp, LK_SHARED | LK_RETRY); pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); exec_fail_dealloc: /* * free various allocated resources */ if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (args->fname) NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); vput(imgp->vp); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { PROC_LOCK(p); td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); /* * Stop the process here if its stop event mask has * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); goto done2; } exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); done2: #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, W_EXITCODE(0, SIGABRT)); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif return (error); } int exec_map_first_page(imgp) struct image_params *imgp; { int rv, i; int initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); VM_OBJECT_WLOCK(object); #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); if (ma[0]->valid != VM_PAGE_BITS_ALL) { initial_pagein = VM_INITIAL_PAGEIN; if (initial_pagein > object->size) initial_pagein = object->size; for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; if (vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); if (ma[i] == NULL) break; } } initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, 0); ma[0] = vm_page_lookup(object, 0); if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { if (ma[0] != NULL) { vm_page_lock(ma[0]); vm_page_free(ma[0]); vm_page_unlock(ma[0]); } VM_OBJECT_WUNLOCK(object); return (EIO); } } vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_activate(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(imgp) struct image_params *imgp; { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } } /* * Destroy old address space, and allocate a new stack * The new stack is only SGROWSIZ large because it is grown * automatically in trap.c. */ int exec_new_vmspace(imgp, sv) struct image_params *imgp; struct sysentvec *sv; { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; u_long ssiz; imgp->vmspace_destroyed = 1; imgp->sysent = sv; /* May be called with Giant held */ EVENTHANDLER_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error) { vm_object_deallocate(obj); return (error); } } /* Allocate a new stack */ if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } stack_addr = sv->sv_usrstack - ssiz; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error) return (error); /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long argp, envp; int error; size_t length; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ for (;;) { error = fueword(argv++, &argp); if (error == -1) { error = EFAULT; goto err_exit; } if (argp == 0) break; error = copyinstr((void *)(uintptr_t)argp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &envp); if (error == -1) { error = EFAULT; goto err_exit; } if (envp == 0) break; error = copyinstr((void *)(uintptr_t)envp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. Returns zero if the allocation succeeds * and ENOMEM otherwise. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); return (args->buf != NULL ? 0 : ENOMEM); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, PATH_MAX + ARG_MAX); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ register_t * exec_copyout_strings(imgp) struct image_params *imgp; { int argc, envc; char **vectp; char *stringp; uintptr_t destp; register_t *stack_base; struct ps_strings *arginfo; struct proc *p; size_t execpath_len; int szsigcode, szps; char canary[sizeof(long) * 8]; szps = sizeof(pagesizes[0]) * MAXPAGESIZES; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; szsigcode = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; if (p->p_sysent->sv_sigcode_base == 0) { if (p->p_sysent->sv_szsigcode != NULL) szsigcode = *(p->p_sysent->sv_szsigcode); } destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ destp -= szps; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = destp; copyout(pagesizes, (void *)destp, szps); imgp->pagesizeslen = szps; destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(imgp) struct image_params *imgp; { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error, writecount; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) return (error); if (writecount != 0) return (ETXTBSY); /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = 1; return (error); } /* * Exec handler registration */ int exec_register(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); if (newexecsw == NULL) return (ENOMEM); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); if (newexecsw == NULL) return (ENOMEM); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } Index: head/sys/sys/imgact.h =================================================================== --- head/sys/sys/imgact.h (revision 282707) +++ head/sys/sys/imgact.h (revision 282708) @@ -1,103 +1,106 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_IMGACT_H_ #define _SYS_IMGACT_H_ #include #include #define MAXSHELLCMDLEN PAGE_SIZE struct image_args { char *buf; /* pointer to string buffer */ char *begin_argv; /* beginning of argv in buf */ char *begin_envv; /* beginning of envv in buf */ char *endp; /* current `end' pointer of arg & env strings */ char *fname; /* pointer to filename of executable (system space) */ char *fname_buf; /* pointer to optional malloc(M_TEMP) buffer */ int stringspace; /* space left in arg & env buffer */ int argc; /* count of argument strings */ int envc; /* count of environment strings */ int fd; /* file descriptor of the executable */ }; struct image_params { struct proc *proc; /* our process struct */ struct label *execlabel; /* optional exec label */ struct vnode *vp; /* pointer to vnode of file to exec */ struct vm_object *object; /* The vm object for this vp */ struct vattr *attr; /* attributes of file */ const char *image_header; /* head of file to exec */ unsigned long entry_addr; /* entry address of target executable */ unsigned long reloc_base; /* load address of image */ char vmspace_destroyed; /* flag - we've blown away original vm space */ #define IMGACT_SHELL 0x1 #define IMGACT_BINMISC 0x2 unsigned char interpreted; /* mask of interpreters that have run */ char opened; /* flag - we have opened executable vnode */ char *interpreter_name; /* name of the interpreter */ void *auxargs; /* ELF Auxinfo structure pointer */ struct sf_buf *firstpage; /* first page that we mapped */ unsigned long ps_strings; /* PS_STRINGS for BSD/OS binaries */ size_t auxarg_size; struct image_args *args; /* system call arguments */ struct sysentvec *sysent; /* system entry vector */ char *execpath; unsigned long execpathp; char *freepath; unsigned long canary; int canarylen; unsigned long pagesizes; int pagesizeslen; vm_prot_t stack_prot; u_long stack_sz; }; #ifdef _KERNEL struct sysentvec; struct thread; +struct vmspace; #define IMGACT_CORE_COMPRESS 0x01 int exec_alloc_args(struct image_args *); int exec_check_permissions(struct image_params *); register_t *exec_copyout_strings(struct image_params *); void exec_free_args(struct image_args *); int exec_new_vmspace(struct image_params *, struct sysentvec *); void exec_setregs(struct thread *, struct image_params *, u_long); int exec_shell_imgact(struct image_params *); int exec_copyin_args(struct image_args *, char *, enum uio_seg, char **, char **); +int pre_execve(struct thread *td, struct vmspace **oldvmspace); +void post_execve(struct thread *td, int error, struct vmspace *oldvmspace); #endif #endif /* !_SYS_IMGACT_H_ */