Index: head/sys/amd64/linux/linux_machdep.c =================================================================== --- head/sys/amd64/linux/linux_machdep.c (revision 364365) +++ head/sys/amd64/linux/linux_machdep.c (revision 364366) @@ -1,335 +1,339 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - LINUX_CTR(execve); - error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, - args->envp); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, + args->argp, args->envp); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, + args->envp); + LFREEPATH(path); + } if (error == 0) error = linux_common_execve(td, &eargs); return (error); } int linux_set_upcall_kse(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_rsp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent do. */ td->td_frame->tf_rax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, args->flags, args->fd, args->pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; LINUX_CTR(iopl); if (args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; LINUX_CTR2(rt_sigsuspend, "%p, %ld", uap->newset, uap->sigsetsize); if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; LINUX_CTR(pause); PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; memset(&lss, 0, sizeof(lss)); LINUX_CTR2(sigaltstack, "%p, %p", uap->uss, uap->uoss); if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = PTRIN(lss.ss_sp); ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = PTROUT(oss.ss_sp); lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_arch_prctl(struct thread *td, struct linux_arch_prctl_args *args) { struct pcb *pcb; int error; pcb = td->td_pcb; LINUX_CTR2(arch_prctl, "0x%x, %p", args->code, args->addr); switch (args->code) { case LINUX_ARCH_SET_GS: if (args->addr < VM_MAXUSER_ADDRESS) { set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = args->addr; td->td_frame->tf_gs = _ugssel; error = 0; } else error = EPERM; break; case LINUX_ARCH_SET_FS: if (args->addr < VM_MAXUSER_ADDRESS) { set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = args->addr; td->td_frame->tf_fs = _ufssel; error = 0; } else error = EPERM; break; case LINUX_ARCH_GET_FS: error = copyout(&pcb->pcb_fsbase, PTRIN(args->addr), sizeof(args->addr)); break; case LINUX_ARCH_GET_GS: error = copyout(&pcb->pcb_gsbase, PTRIN(args->addr), sizeof(args->addr)); break; default: error = EINVAL; } return (error); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct pcb *pcb; if ((uint64_t)desc >= VM_MAXUSER_ADDRESS) return (EPERM); pcb = td->td_pcb; pcb->pcb_fsbase = (register_t)desc; td->td_frame->tf_fs = _ufssel; return (0); } int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xchgl_smap : futex_xchgl_nosmap); } int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_addl_smap : futex_addl_nosmap); } int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_orl_smap : futex_orl_nosmap); } int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_andl_smap : futex_andl_nosmap); } int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xorl_smap : futex_xorl_nosmap); } Index: head/sys/arm64/linux/linux_machdep.c =================================================================== --- head/sys/arm64/linux/linux_machdep.c (revision 364365) +++ head/sys/arm64/linux/linux_machdep.c (revision 364366) @@ -1,139 +1,143 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Turing Robotic Industries Inc. * Copyright (c) 2000 Marcel Moolenaar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /* DTrace probes */ LIN_SDT_PROBE_DEFINE0(machdep, linux_set_upcall_kse, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_mmap2, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_rt_sigsuspend, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_sigaltstack, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_set_cloned_tls, todo); /* * LINUXTODO: deduplicate; linux_execve is common across archs, except that on * amd64 compat linuxulator it calls freebsd32_exec_copyin_args. */ int linux_execve(struct thread *td, struct linux_execve_args *uap) { struct image_args eargs; char *path; int error; - LCONVPATHEXIST(td, uap->path, &path); - - error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, - uap->envp); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + error = exec_copyin_args(&eargs, uap->path, UIO_USERSPACE, + uap->argp, uap->envp); + } else { + LCONVPATHEXIST(td, uap->path, &path); + error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, + uap->argp, uap->envp); + LFREEPATH(path); + } if (error == 0) error = linux_common_execve(td, &eargs); return (error); } /* LINUXTODO: implement (or deduplicate) arm64 linux_set_upcall_kse */ int linux_set_upcall_kse(struct thread *td, register_t stack) { LIN_SDT_PROBE0(machdep, linux_set_upcall_kse, todo); return (EDOOFUS); } /* LINUXTODO: deduplicate arm64 linux_mmap2 */ int linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) { LIN_SDT_PROBE0(machdep, linux_mmap2, todo); return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, uap->flags, uap->fd, uap->pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } /* LINUXTODO: implement arm64 linux_rt_sigsuspend */ int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { LIN_SDT_PROBE0(machdep, linux_rt_sigsuspend, todo); return (EDOOFUS); } /* LINUXTODO: implement arm64 linux_sigaltstack */ int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { LIN_SDT_PROBE0(machdep, linux_sigaltstack, todo); return (EDOOFUS); } /* LINUXTODO: implement arm64 linux_set_cloned_tls */ int linux_set_cloned_tls(struct thread *td, void *desc) { LIN_SDT_PROBE0(machdep, linux_set_cloned_tls, todo); return (EDOOFUS); } Index: head/sys/compat/linux/linux_file.c =================================================================== --- head/sys/compat/linux/linux_file.c (revision 364365) +++ head/sys/compat/linux/linux_file.c (revision 364366) @@ -1,1782 +1,1822 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #include #else #include #include #endif #include #include #include -static int linux_common_open(struct thread *, int, char *, int, int); +static int linux_common_open(struct thread *, int, const char *, int, int, + enum uio_seg); static int linux_getdents_error(struct thread *, int, int); static struct bsd_to_linux_bitmap seal_bitmap[] = { BITMAP_1t1_LINUX(F_SEAL_SEAL), BITMAP_1t1_LINUX(F_SEAL_SHRINK), BITMAP_1t1_LINUX(F_SEAL_GROW), BITMAP_1t1_LINUX(F_SEAL_WRITE), }; #define MFD_HUGETLB_ENTRY(_size) \ { \ .bsd_value = MFD_HUGE_##_size, \ .linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size \ } static struct bsd_to_linux_bitmap mfd_bitmap[] = { BITMAP_1t1_LINUX(MFD_CLOEXEC), BITMAP_1t1_LINUX(MFD_ALLOW_SEALING), BITMAP_1t1_LINUX(MFD_HUGETLB), MFD_HUGETLB_ENTRY(64KB), MFD_HUGETLB_ENTRY(512KB), MFD_HUGETLB_ENTRY(1MB), MFD_HUGETLB_ENTRY(2MB), MFD_HUGETLB_ENTRY(8MB), MFD_HUGETLB_ENTRY(16MB), MFD_HUGETLB_ENTRY(32MB), MFD_HUGETLB_ENTRY(256MB), MFD_HUGETLB_ENTRY(512MB), MFD_HUGETLB_ENTRY(1GB), MFD_HUGETLB_ENTRY(2GB), MFD_HUGETLB_ENTRY(16GB), }; #undef MFD_HUGETLB_ENTRY #ifdef LINUX_LEGACY_SYSCALLS int linux_creat(struct thread *td, struct linux_creat_args *args) { char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - - error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, - O_WRONLY | O_CREAT | O_TRUNC, args->mode); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, + O_WRONLY | O_CREAT | O_TRUNC, args->mode); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, + O_WRONLY | O_CREAT | O_TRUNC, args->mode); + LFREEPATH(path); + } return (error); } #endif static int -linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode) +linux_common_open(struct thread *td, int dirfd, const char *path, int l_flags, + int mode, enum uio_seg seg) { struct proc *p = td->td_proc; struct file *fp; int fd; int bsd_flags, error; bsd_flags = 0; switch (l_flags & LINUX_O_ACCMODE) { case LINUX_O_WRONLY: bsd_flags |= O_WRONLY; break; case LINUX_O_RDWR: bsd_flags |= O_RDWR; break; default: bsd_flags |= O_RDONLY; } if (l_flags & LINUX_O_NDELAY) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_APPEND) bsd_flags |= O_APPEND; if (l_flags & LINUX_O_SYNC) bsd_flags |= O_FSYNC; if (l_flags & LINUX_O_CLOEXEC) bsd_flags |= O_CLOEXEC; if (l_flags & LINUX_O_NONBLOCK) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_ASYNC) bsd_flags |= O_ASYNC; if (l_flags & LINUX_O_CREAT) bsd_flags |= O_CREAT; if (l_flags & LINUX_O_TRUNC) bsd_flags |= O_TRUNC; if (l_flags & LINUX_O_EXCL) bsd_flags |= O_EXCL; if (l_flags & LINUX_O_NOCTTY) bsd_flags |= O_NOCTTY; if (l_flags & LINUX_O_DIRECT) bsd_flags |= O_DIRECT; if (l_flags & LINUX_O_NOFOLLOW) bsd_flags |= O_NOFOLLOW; if (l_flags & LINUX_O_DIRECTORY) bsd_flags |= O_DIRECTORY; /* XXX LINUX_O_NOATIME: unable to be easily implemented. */ - error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode); + error = kern_openat(td, dirfd, path, seg, bsd_flags, mode); if (error != 0) { if (error == EMLINK) error = ELOOP; goto done; } if (p->p_flag & P_CONTROLT) goto done; if (bsd_flags & O_NOCTTY) goto done; /* * XXX In between kern_openat() and fget(), another process * having the same filedesc could use that fd without * checking below. */ fd = td->td_retval[0]; if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) { if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); goto done; } sx_slock(&proctree_lock); PROC_LOCK(p); if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* XXXPJD: Verify if TIOCSCTTY is allowed. */ (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); } else { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); } fdrop(fp, td); } done: - LFREEPATH(path); return (error); } int linux_openat(struct thread *td, struct linux_openat_args *args) { char *path; - int dfd; + int dfd, error; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + if (!LUSECONVPATH(td)) { + return (linux_common_open(td, dfd, args->filename, args->flags, + args->mode, UIO_USERSPACE)); + } if (args->flags & LINUX_O_CREAT) LCONVPATH_AT(td, args->filename, &path, 1, dfd); else LCONVPATH_AT(td, args->filename, &path, 0, dfd); - return (linux_common_open(td, dfd, path, args->flags, args->mode)); + error = linux_common_open(td, dfd, path, args->flags, args->mode, + UIO_SYSSPACE); + LFREEPATH(path); + return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_open(struct thread *td, struct linux_open_args *args) { char *path; + int error; + if (!LUSECONVPATH(td)) { + return (linux_common_open(td, AT_FDCWD, args->path, args->flags, + args->mode, UIO_USERSPACE)); + } if (args->flags & LINUX_O_CREAT) LCONVPATHCREAT(td, args->path, &path); else LCONVPATHEXIST(td, args->path, &path); - return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode)); + error = linux_common_open(td, AT_FDCWD, path, args->flags, args->mode, + UIO_SYSSPACE); + LFREEPATH(path); + return (error); } #endif int linux_lseek(struct thread *td, struct linux_lseek_args *args) { return (kern_lseek(td, args->fdes, args->off, args->whence)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_llseek(struct thread *td, struct linux_llseek_args *args) { int error; off_t off; off = (args->olow) | (((off_t) args->ohigh) << 32); error = kern_lseek(td, args->fd, off, args->whence); if (error != 0) return (error); error = copyout(td->td_retval, args->res, sizeof(off_t)); if (error != 0) return (error); td->td_retval[0] = 0; return (0); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * Note that linux_getdents(2) and linux_getdents64(2) have the same * arguments. They only differ in the definition of struct dirent they * operate on. * Note that linux_readdir(2) is a special case of linux_getdents(2) * where count is always equals 1, meaning that the buffer is one * dirent-structure in size and that the code can't handle more anyway. * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2) * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will * trash user stack. */ static int linux_getdents_error(struct thread *td, int fd, int err) { struct vnode *vp; struct file *fp; int error; /* Linux return ENOTDIR in case when fd is not a directory. */ error = getvnode(td, fd, &cap_read_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (ENOTDIR); } fdrop(fp, td); return (err); } struct l_dirent { l_ulong d_ino; l_off_t d_off; l_ushort d_reclen; char d_name[LINUX_NAME_MAX + 1]; }; struct l_dirent64 { uint64_t d_ino; int64_t d_off; l_ushort d_reclen; u_char d_type; char d_name[LINUX_NAME_MAX + 1]; }; /* * Linux uses the last byte in the dirent buffer to store d_type, * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes. */ #define LINUX_RECLEN(namlen) \ roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong)) #define LINUX_RECLEN64(namlen) \ roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1, \ sizeof(uint64_t)) #ifdef LINUX_LEGACY_SYSCALLS int linux_getdents(struct thread *td, struct linux_getdents_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent *linux_dirent; int buflen, error; size_t retval; buflen = min(args->count, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = base + reclen; linux_dirent->d_reclen = linuxreclen; /* * Copy d_type to last byte of l_dirent buffer */ lbuf[linuxreclen - 1] = bdp->d_type; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)-1); error = copyout(linux_dirent, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #endif int linux_getdents64(struct thread *td, struct linux_getdents64_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent64 *linux_dirent64; int buflen, error; size_t retval; buflen = min(args->count, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dirent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN64(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent64 = (struct l_dirent64*)lbuf; linux_dirent64->d_ino = bdp->d_fileno; linux_dirent64->d_off = base + reclen; linux_dirent64->d_reclen = linuxreclen; linux_dirent64->d_type = bdp->d_type; strlcpy(linux_dirent64->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent64, d_name)); error = copyout(linux_dirent64, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_readdir(struct thread *td, struct linux_readdir_args *args) { struct dirent *bdp; caddr_t buf; /* BSD-format */ int linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent *linux_dirent; int buflen, error; buflen = LINUX_RECLEN(LINUX_NAME_MAX); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out; } if (td->td_retval[0] == 0) goto out; lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); bdp = (struct dirent *) buf; linuxreclen = LINUX_RECLEN(bdp->d_namlen); linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = linuxreclen; linux_dirent->d_reclen = bdp->d_namlen; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)); error = copyout(linux_dirent, args->dent, linuxreclen); if (error == 0) td->td_retval[0] = linuxreclen; free(lbuf, M_TEMP); out: free(buf, M_TEMP); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * These exist mainly for hooks for doing /compat/linux translation. */ #ifdef LINUX_LEGACY_SYSCALLS int linux_access(struct thread *td, struct linux_access_args *args) { char *path; int error; /* Linux convention. */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); - LCONVPATHEXIST(td, args->path, &path); + if (!LUSECONVPATH(td)) { + error = kern_accessat(td, AT_FDCWD, args->path, UIO_USERSPACE, 0, + args->amode); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, + args->amode); + LFREEPATH(path); + } - error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, - args->amode); - LFREEPATH(path); - return (error); } #endif int linux_faccessat(struct thread *td, struct linux_faccessat_args *args) { char *path; int error, dfd; /* Linux convention. */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; - LCONVPATHEXIST_AT(td, args->filename, &path, dfd); + if (!LUSECONVPATH(td)) { + error = kern_accessat(td, dfd, args->filename, UIO_USERSPACE, 0, args->amode); + } else { + LCONVPATHEXIST_AT(td, args->filename, &path, dfd); + error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode); + LFREEPATH(path); + } - error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode); - LFREEPATH(path); - return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_unlink(struct thread *td, struct linux_unlink_args *args) { char *path; int error; struct stat st; - LCONVPATHEXIST(td, args->path, &path); - - error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0); - if (error == EPERM) { - /* Introduce POSIX noncompliant behaviour of Linux */ - if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, - NULL) == 0) { - if (S_ISDIR(st.st_mode)) - error = EISDIR; + if (!LUSECONVPATH(td)) { + error = kern_funlinkat(td, AT_FDCWD, args->path, FD_NONE, + UIO_USERSPACE, 0, 0); + if (error == EPERM) { + /* Introduce POSIX noncompliant behaviour of Linux */ + if (kern_statat(td, 0, AT_FDCWD, args->path, + UIO_SYSSPACE, &st, NULL) == 0) { + if (S_ISDIR(st.st_mode)) + error = EISDIR; + } } + } else { + LCONVPATHEXIST(td, args->path, &path); + error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0); + if (error == EPERM) { + /* Introduce POSIX noncompliant behaviour of Linux */ + if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, + NULL) == 0) { + if (S_ISDIR(st.st_mode)) + error = EISDIR; + } + } + LFREEPATH(path); } - LFREEPATH(path); + return (error); } #endif int linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args) { char *path; int error, dfd; struct stat st; if (args->flag & ~LINUX_AT_REMOVEDIR) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); if (args->flag & LINUX_AT_REMOVEDIR) error = kern_frmdirat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0); else error = kern_funlinkat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0, 0); if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path, UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode)) error = EISDIR; } LFREEPATH(path); return (error); } int linux_chdir(struct thread *td, struct linux_chdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_chdir(td, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_chmod(struct thread *td, struct linux_chmod_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } #endif int linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mkdir(struct thread *td, struct linux_mkdir_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } #endif int linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->pathname, &path, dfd); error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_rmdir(struct thread *td, struct linux_rmdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0); LFREEPATH(path); return (error); } int linux_rename(struct thread *td, struct linux_rename_args *args) { char *from, *to; int error; LCONVPATHEXIST(td, args->from, &from); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(from); return (error); } error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } #endif int linux_renameat(struct thread *td, struct linux_renameat_args *args) { struct linux_renameat2_args renameat2_args = { .olddfd = args->olddfd, .oldname = args->oldname, .newdfd = args->newdfd, .newname = args->newname, .flags = 0 }; return (linux_renameat2(td, &renameat2_args)); } int linux_renameat2(struct thread *td, struct linux_renameat2_args *args) { char *from, *to; int error, olddfd, newdfd; if (args->flags != 0) { if (args->flags & ~(LINUX_RENAME_EXCHANGE | LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT)) return (EINVAL); if (args->flags & LINUX_RENAME_EXCHANGE && args->flags & (LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT)) return (EINVAL); linux_msg(td, "renameat2 unsupported flags 0x%x", args->flags); return (EINVAL); } olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(from); return (error); } error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_symlink(struct thread *td, struct linux_symlink_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } #endif int linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args) { char *path, *to; int error, dfd; dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST(td, args->oldname, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_readlink(struct thread *td, struct linux_readlink_args *args) { char *name; int error; LCONVPATHEXIST(td, args->name, &name); error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->count); LFREEPATH(name); return (error); } #endif int linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args) { char *name; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->path, &name, dfd); error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->bufsiz); LFREEPATH(name); return (error); } int linux_truncate(struct thread *td, struct linux_truncate_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_truncate64(struct thread *td, struct linux_truncate64_args *args) { char *path; off_t length; int error; #if defined(__amd64__) && defined(COMPAT_LINUX32) length = PAIR32TO64(off_t, args->length); #else length = args->length; #endif LCONVPATHEXIST(td, args->path, &path); error = kern_truncate(td, path, UIO_SYSSPACE, length); LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args) { return (kern_ftruncate(td, args->fd, args->length)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) { off_t length; #if defined(__amd64__) && defined(COMPAT_LINUX32) length = PAIR32TO64(off_t, args->length); #else length = args->length; #endif return (kern_ftruncate(td, args->fd, length)); } #endif #ifdef LINUX_LEGACY_SYSCALLS int linux_link(struct thread *td, struct linux_link_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE, FOLLOW); LFREEPATH(path); LFREEPATH(to); return (error); } #endif int linux_linkat(struct thread *td, struct linux_linkat_args *args) { char *path, *to; int error, olddfd, newdfd, follow; if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW) return (EINVAL); olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(path); return (error); } follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW : FOLLOW; error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap) { return (kern_fsync(td, uap->fd, false)); } int linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap) { off_t nbytes, offset; #if defined(__amd64__) && defined(COMPAT_LINUX32) nbytes = PAIR32TO64(off_t, uap->nbytes); offset = PAIR32TO64(off_t, uap->offset); #else nbytes = uap->nbytes; offset = uap->offset; #endif if (offset < 0 || nbytes < 0 || (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE | LINUX_SYNC_FILE_RANGE_WRITE | LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) { return (EINVAL); } return (kern_fsync(td, uap->fd, false)); } int linux_pread(struct thread *td, struct linux_pread_args *uap) { struct vnode *vp; off_t offset; int error; #if defined(__amd64__) && defined(COMPAT_LINUX32) offset = PAIR32TO64(off_t, uap->offset); #else offset = uap->offset; #endif error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset); if (error == 0) { /* This seems to violate POSIX but Linux does it. */ error = fgetvp(td, uap->fd, &cap_pread_rights, &vp); if (error != 0) return (error); if (vp->v_type == VDIR) error = EISDIR; vrele(vp); } return (error); } int linux_pwrite(struct thread *td, struct linux_pwrite_args *uap) { off_t offset; #if defined(__amd64__) && defined(COMPAT_LINUX32) offset = PAIR32TO64(off_t, uap->offset); #else offset = uap->offset; #endif return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset)); } int linux_preadv(struct thread *td, struct linux_preadv_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_preadv(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_pwritev(struct thread *td, struct linux_pwritev_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_pwritev(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_mount(struct thread *td, struct linux_mount_args *args) { char fstypename[MFSNAMELEN]; char *mntonname, *mntfromname; int error, fsflags; mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK); mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1, NULL); if (error != 0) goto out; if (args->specialfile != NULL) { error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL); if (error != 0) goto out; } else { mntfromname[0] = '\0'; } error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL); if (error != 0) goto out; if (strcmp(fstypename, "ext2") == 0) { strcpy(fstypename, "ext2fs"); } else if (strcmp(fstypename, "proc") == 0) { strcpy(fstypename, "linprocfs"); } else if (strcmp(fstypename, "vfat") == 0) { strcpy(fstypename, "msdosfs"); } fsflags = 0; /* * Linux SYNC flag is not included; the closest equivalent * FreeBSD has is !ASYNC, which is our default. */ if (args->rwflag & LINUX_MS_RDONLY) fsflags |= MNT_RDONLY; if (args->rwflag & LINUX_MS_NOSUID) fsflags |= MNT_NOSUID; if (args->rwflag & LINUX_MS_NOEXEC) fsflags |= MNT_NOEXEC; if (args->rwflag & LINUX_MS_REMOUNT) fsflags |= MNT_UPDATE; error = kernel_vmount(fsflags, "fstype", fstypename, "fspath", mntonname, "from", mntfromname, NULL); out: free(mntonname, M_TEMP); free(mntfromname, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_oldumount(struct thread *td, struct linux_oldumount_args *args) { return (kern_unmount(td, args->path, 0)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_umount(struct thread *td, struct linux_umount_args *args) { int flags; flags = 0; if ((args->flags & LINUX_MNT_FORCE) != 0) { args->flags &= ~LINUX_MNT_FORCE; flags |= MNT_FORCE; } if (args->flags != 0) { linux_msg(td, "unsupported umount2 flags %#x", args->flags); return (EINVAL); } return (kern_unmount(td, args->path, flags)); } #endif /* * fcntl family of syscalls */ struct l_flock { l_short l_type; l_short l_whence; l_off_t l_start; l_off_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_off_t)bsd_flock->l_start; linux_flock->l_len = (l_off_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_flock64 { l_short l_type; l_short l_whence; l_loff_t l_start; l_loff_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_loff_t)bsd_flock->l_start; linux_flock->l_len = (l_loff_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int fcntl_common(struct thread *td, struct linux_fcntl_args *args) { struct l_flock linux_flock; struct flock bsd_flock; struct file *fp; long arg; int error, result; switch (args->cmd) { case LINUX_F_DUPFD: return (kern_fcntl(td, args->fd, F_DUPFD, args->arg)); case LINUX_F_GETFD: return (kern_fcntl(td, args->fd, F_GETFD, 0)); case LINUX_F_SETFD: return (kern_fcntl(td, args->fd, F_SETFD, args->arg)); case LINUX_F_GETFL: error = kern_fcntl(td, args->fd, F_GETFL, 0); result = td->td_retval[0]; td->td_retval[0] = 0; if (result & O_RDONLY) td->td_retval[0] |= LINUX_O_RDONLY; if (result & O_WRONLY) td->td_retval[0] |= LINUX_O_WRONLY; if (result & O_RDWR) td->td_retval[0] |= LINUX_O_RDWR; if (result & O_NDELAY) td->td_retval[0] |= LINUX_O_NONBLOCK; if (result & O_APPEND) td->td_retval[0] |= LINUX_O_APPEND; if (result & O_FSYNC) td->td_retval[0] |= LINUX_O_SYNC; if (result & O_ASYNC) td->td_retval[0] |= LINUX_O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (result & O_NOFOLLOW) td->td_retval[0] |= LINUX_O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (result & O_DIRECT) td->td_retval[0] |= LINUX_O_DIRECT; #endif return (error); case LINUX_F_SETFL: arg = 0; if (args->arg & LINUX_O_NDELAY) arg |= O_NONBLOCK; if (args->arg & LINUX_O_APPEND) arg |= O_APPEND; if (args->arg & LINUX_O_SYNC) arg |= O_FSYNC; if (args->arg & LINUX_O_ASYNC) arg |= O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (args->arg & LINUX_O_NOFOLLOW) arg |= O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (args->arg & LINUX_O_DIRECT) arg |= O_DIRECT; #endif return (kern_fcntl(td, args->fd, F_SETFL, arg)); case LINUX_F_GETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); case LINUX_F_GETOWN: return (kern_fcntl(td, args->fd, F_GETOWN, 0)); case LINUX_F_SETOWN: /* * XXX some Linux applications depend on F_SETOWN having no * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ error = fget(td, args->fd, &cap_fcntl_rights, &fp); if (error) return (error); if (fp->f_type == DTYPE_PIPE) { fdrop(fp, td); return (EINVAL); } fdrop(fp, td); return (kern_fcntl(td, args->fd, F_SETOWN, args->arg)); case LINUX_F_DUPFD_CLOEXEC: return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg)); /* * Our F_SEAL_* values match Linux one for maximum compatibility. So we * only needed to account for different values for fcntl(2) commands. */ case LINUX_F_GET_SEALS: error = kern_fcntl(td, args->fd, F_GET_SEALS, 0); if (error != 0) return (error); td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0], seal_bitmap, 0); return (0); case LINUX_F_ADD_SEALS: return (kern_fcntl(td, args->fd, F_ADD_SEALS, linux_to_bsd_bits(args->arg, seal_bitmap, 0))); default: linux_msg(td, "unsupported fcntl cmd %d\n", args->cmd); return (EINVAL); } } int linux_fcntl(struct thread *td, struct linux_fcntl_args *args) { return (fcntl_common(td, args)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) { struct l_flock64 linux_flock; struct flock bsd_flock; struct linux_fcntl_args fcntl_args; int error; switch (args->cmd) { case LINUX_F_GETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock64(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); } fcntl_args.fd = args->fd; fcntl_args.cmd = args->cmd; fcntl_args.arg = args->arg; return (fcntl_common(td, &fcntl_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_chown(struct thread *td, struct linux_chown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, 0); LFREEPATH(path); return (error); } #endif int linux_fchownat(struct thread *td, struct linux_fchownat_args *args) { char *path; int error, dfd, flag; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 : AT_SYMLINK_NOFOLLOW; error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid, flag); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_lchown(struct thread *td, struct linux_lchown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, AT_SYMLINK_NOFOLLOW); LFREEPATH(path); return (error); } #endif static int convert_fadvice(int advice) { switch (advice) { case LINUX_POSIX_FADV_NORMAL: return (POSIX_FADV_NORMAL); case LINUX_POSIX_FADV_RANDOM: return (POSIX_FADV_RANDOM); case LINUX_POSIX_FADV_SEQUENTIAL: return (POSIX_FADV_SEQUENTIAL); case LINUX_POSIX_FADV_WILLNEED: return (POSIX_FADV_WILLNEED); case LINUX_POSIX_FADV_DONTNEED: return (POSIX_FADV_DONTNEED); case LINUX_POSIX_FADV_NOREUSE: return (POSIX_FADV_NOREUSE); default: return (-1); } } int linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args) { off_t offset; int advice; #if defined(__amd64__) && defined(COMPAT_LINUX32) offset = PAIR32TO64(off_t, args->offset); #else offset = args->offset; #endif advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, offset, args->len, advice)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) { off_t len, offset; int advice; #if defined(__amd64__) && defined(COMPAT_LINUX32) len = PAIR32TO64(off_t, args->len); offset = PAIR32TO64(off_t, args->offset); #else len = args->len; offset = args->offset; #endif advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, offset, len, advice)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_pipe(struct thread *td, struct linux_pipe_args *args) { int fildes[2]; int error; error = kern_pipe(td, fildes, 0, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } #endif int linux_pipe2(struct thread *td, struct linux_pipe2_args *args) { int fildes[2]; int error, flags; if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0) return (EINVAL); flags = 0; if ((args->flags & LINUX_O_NONBLOCK) != 0) flags |= O_NONBLOCK; if ((args->flags & LINUX_O_CLOEXEC) != 0) flags |= O_CLOEXEC; error = kern_pipe(td, fildes, flags, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } int linux_dup3(struct thread *td, struct linux_dup3_args *args) { int cmd; intptr_t newfd; if (args->oldfd == args->newfd) return (EINVAL); if ((args->flags & ~LINUX_O_CLOEXEC) != 0) return (EINVAL); if (args->flags & LINUX_O_CLOEXEC) cmd = F_DUP2FD_CLOEXEC; else cmd = F_DUP2FD; newfd = args->newfd; return (kern_fcntl(td, args->oldfd, cmd, newfd)); } int linux_fallocate(struct thread *td, struct linux_fallocate_args *args) { off_t len, offset; /* * We emulate only posix_fallocate system call for which * mode should be 0. */ if (args->mode != 0) return (EOPNOTSUPP); #if defined(__amd64__) && defined(COMPAT_LINUX32) len = PAIR32TO64(off_t, args->len); offset = PAIR32TO64(off_t, args->offset); #else len = args->len; offset = args->offset; #endif return (kern_posix_fallocate(td, args->fd, offset, len)); } int linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args *args) { l_loff_t inoff, outoff, *inoffp, *outoffp; int error, flags; /* * copy_file_range(2) on Linux doesn't define any flags (yet), so is * the native implementation. Enforce it. */ if (args->flags != 0) { linux_msg(td, "copy_file_range unsupported flags 0x%x", args->flags); return (EINVAL); } flags = 0; inoffp = outoffp = NULL; if (args->off_in != NULL) { error = copyin(args->off_in, &inoff, sizeof(l_loff_t)); if (error != 0) return (error); inoffp = &inoff; } if (args->off_out != NULL) { error = copyin(args->off_out, &outoff, sizeof(l_loff_t)); if (error != 0) return (error); outoffp = &outoff; } error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out, outoffp, args->len, flags); if (error == 0 && args->off_in != NULL) error = copyout(inoffp, args->off_in, sizeof(l_loff_t)); if (error == 0 && args->off_out != NULL) error = copyout(outoffp, args->off_out, sizeof(l_loff_t)); return (error); } #define LINUX_MEMFD_PREFIX "memfd:" int linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args) { char memfd_name[LINUX_NAME_MAX + 1]; int error, flags, shmflags, oflags; /* * This is our clever trick to avoid the heap allocation to copy in the * uname. We don't really need to go this far out of our way, but it * does keep the rest of this function fairly clean as they don't have * to worry about cleanup on the way out. */ error = copyinstr(args->uname_ptr, memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1, LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL); if (error != 0) { if (error == ENAMETOOLONG) error = EINVAL; return (error); } memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1); flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0); if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_HUGE_MASK)) != 0) return (EINVAL); /* Size specified but no HUGETLB. */ if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0) return (EINVAL); /* We don't actually support HUGETLB. */ if ((flags & MFD_HUGETLB) != 0) return (ENOSYS); oflags = O_RDWR; shmflags = SHM_GROW_ON_WRITE; if ((flags & MFD_CLOEXEC) != 0) oflags |= O_CLOEXEC; if ((flags & MFD_ALLOW_SEALING) != 0) shmflags |= SHM_ALLOW_SEALING; return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL, memfd_name)); } int linux_splice(struct thread *td, struct linux_splice_args *args) { linux_msg(td, "syscall splice not really implemented"); /* * splice(2) is documented to return EINVAL in various circumstances; * returning it instead of ENOSYS should hint the caller to use fallback * instead. */ return (EINVAL); } Index: head/sys/compat/linux/linux_mib.c =================================================================== --- head/sys/compat/linux/linux_mib.c (revision 364365) +++ head/sys/compat/linux/linux_mib.c (revision 364366) @@ -1,565 +1,569 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include struct linux_prison { char pr_osname[LINUX_MAX_UTSNAME]; char pr_osrelease[LINUX_MAX_UTSNAME]; int pr_oss_version; int pr_osrel; }; static struct linux_prison lprison0 = { .pr_osname = "Linux", .pr_osrelease = LINUX_VERSION_STR, .pr_oss_version = 0x030600, .pr_osrel = LINUX_VERSION_CODE }; static unsigned linux_osd_jail_slot; SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Linux mode"); int linux_debug = 1; SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); int linux_default_openfiles = 1024; SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, &linux_default_openfiles, 0, "Default soft openfiles resource limit, or -1 for unlimited"); int linux_ignore_ip_recverr = 1; SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); int linux_preserve_vstatus = 0; SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); bool linux_map_sched_prio = true; SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " "(not POSIX compliant)"); +int linux_use_emul_path = 1; +SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN, + &linux_use_emul_path, 0, "Use linux.compat.emul_path"); + static int linux_set_osname(struct thread *td, char *osname); static int linux_set_osrelease(struct thread *td, char *osrelease); static int linux_set_oss_version(struct thread *td, int oss_version); static int linux_sysctl_osname(SYSCTL_HANDLER_ARGS) { char osname[LINUX_MAX_UTSNAME]; int error; linux_get_osname(req->td, osname); error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); if (error != 0 || req->newptr == NULL) return (error); error = linux_set_osname(req->td, osname); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, osname, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 0, 0, linux_sysctl_osname, "A", "Linux kernel OS name"); static int linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) { char osrelease[LINUX_MAX_UTSNAME]; int error; linux_get_osrelease(req->td, osrelease); error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); if (error != 0 || req->newptr == NULL) return (error); error = linux_set_osrelease(req->td, osrelease); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 0, 0, linux_sysctl_osrelease, "A", "Linux kernel OS release"); static int linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) { int oss_version; int error; oss_version = linux_get_oss_version(req->td); error = sysctl_handle_int(oidp, &oss_version, 0, req); if (error != 0 || req->newptr == NULL) return (error); error = linux_set_oss_version(req->td, oss_version); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 0, 0, linux_sysctl_oss_version, "I", "Linux OSS version"); /* * Map the osrelease into integer */ static int linux_map_osrel(char *osrelease, int *osrel) { char *sep, *eosrelease; int len, v0, v1, v2, v; len = strlen(osrelease); eosrelease = osrelease + len; v0 = strtol(osrelease, &sep, 10); if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') return (EINVAL); osrelease = sep + 1; v1 = strtol(osrelease, &sep, 10); if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') return (EINVAL); osrelease = sep + 1; v2 = strtol(osrelease, &sep, 10); if (osrelease == sep || (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) return (EINVAL); v = LINUX_KERNVER(v0, v1, v2); if (v < LINUX_KERNVER(1, 0, 0)) return (EINVAL); if (osrel != NULL) *osrel = v; return (0); } /* * Find a prison with Linux info. * Return the Linux info and the (locked) prison. */ static struct linux_prison * linux_find_prison(struct prison *spr, struct prison **prp) { struct prison *pr; struct linux_prison *lpr; for (pr = spr;; pr = pr->pr_parent) { mtx_lock(&pr->pr_mtx); lpr = (pr == &prison0) ? &lprison0 : osd_jail_get(pr, linux_osd_jail_slot); if (lpr != NULL) break; mtx_unlock(&pr->pr_mtx); } *prp = pr; return (lpr); } /* * Ensure a prison has its own Linux info. If lprp is non-null, point it to * the Linux info and lock the prison. */ static void linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) { struct prison *ppr; struct linux_prison *lpr, *nlpr; void **rsv; /* If this prison already has Linux info, return that. */ lpr = linux_find_prison(pr, &ppr); if (ppr == pr) goto done; /* * Allocate a new info record. Then check again, in case something * changed during the allocation. */ mtx_unlock(&ppr->pr_mtx); nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); rsv = osd_reserve(linux_osd_jail_slot); lpr = linux_find_prison(pr, &ppr); if (ppr == pr) { free(nlpr, M_PRISON); osd_free_reserved(rsv); goto done; } /* Inherit the initial values from the ancestor. */ mtx_lock(&pr->pr_mtx); (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); bcopy(lpr, nlpr, sizeof(*lpr)); lpr = nlpr; mtx_unlock(&ppr->pr_mtx); done: if (lprp != NULL) *lprp = lpr; else mtx_unlock(&pr->pr_mtx); } /* * Jail OSD methods for Linux prison data. */ static int linux_prison_create(void *obj, void *data) { struct prison *pr = obj; struct vfsoptlist *opts = data; int jsys; if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && jsys == JAIL_SYS_INHERIT) return (0); /* * Inherit a prison's initial values from its parent * (different from JAIL_SYS_INHERIT which also inherits changes). */ linux_alloc_prison(pr, NULL); return (0); } static int linux_prison_check(void *obj __unused, void *data) { struct vfsoptlist *opts = data; char *osname, *osrelease; int error, jsys, len, oss_version; /* Check that the parameters are correct. */ error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); if (error != ENOENT) { if (error != 0) return (error); if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) return (EINVAL); } error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); if (error != ENOENT) { if (error != 0) return (error); if (len == 0 || osname[len - 1] != '\0') return (EINVAL); if (len > LINUX_MAX_UTSNAME) { vfs_opterror(opts, "linux.osname too long"); return (ENAMETOOLONG); } } error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); if (error != ENOENT) { if (error != 0) return (error); if (len == 0 || osrelease[len - 1] != '\0') return (EINVAL); if (len > LINUX_MAX_UTSNAME) { vfs_opterror(opts, "linux.osrelease too long"); return (ENAMETOOLONG); } error = linux_map_osrel(osrelease, NULL); if (error != 0) { vfs_opterror(opts, "linux.osrelease format error"); return (error); } } error = vfs_copyopt(opts, "linux.oss_version", &oss_version, sizeof(oss_version)); if (error == ENOENT) error = 0; return (error); } static int linux_prison_set(void *obj, void *data) { struct linux_prison *lpr; struct prison *pr = obj; struct vfsoptlist *opts = data; char *osname, *osrelease; int error, gotversion, jsys, len, oss_version; /* Set the parameters, which should be correct. */ error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); if (error == ENOENT) jsys = -1; error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); if (error == ENOENT) osname = NULL; else jsys = JAIL_SYS_NEW; error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); if (error == ENOENT) osrelease = NULL; else jsys = JAIL_SYS_NEW; error = vfs_copyopt(opts, "linux.oss_version", &oss_version, sizeof(oss_version)); if (error == ENOENT) gotversion = 0; else { gotversion = 1; jsys = JAIL_SYS_NEW; } switch (jsys) { case JAIL_SYS_INHERIT: /* "linux=inherit": inherit the parent's Linux info. */ mtx_lock(&pr->pr_mtx); osd_jail_del(pr, linux_osd_jail_slot); mtx_unlock(&pr->pr_mtx); break; case JAIL_SYS_NEW: /* * "linux=new" or "linux.*": * the prison gets its own Linux info. */ linux_alloc_prison(pr, &lpr); if (osrelease) { (void)linux_map_osrel(osrelease, &lpr->pr_osrel); strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); } if (osname) strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); if (gotversion) lpr->pr_oss_version = oss_version; mtx_unlock(&pr->pr_mtx); } return (0); } SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, "Jail Linux kernel OS name"); SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, "Jail Linux kernel OS release"); SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail Linux OSS version"); static int linux_prison_get(void *obj, void *data) { struct linux_prison *lpr; struct prison *ppr; struct prison *pr = obj; struct vfsoptlist *opts = data; int error, i; static int version0; /* See if this prison is the one with the Linux info. */ lpr = linux_find_prison(pr, &ppr); i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "linux", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; if (i) { error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "linux.oss_version", &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); if (error != 0 && error != ENOENT) goto done; } else { /* * If this prison is inheriting its Linux info, report * empty/zero parameters. */ error = vfs_setopts(opts, "linux.osname", ""); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "linux.osrelease", ""); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "linux.oss_version", &version0, sizeof(lpr->pr_oss_version)); if (error != 0 && error != ENOENT) goto done; } error = 0; done: mtx_unlock(&ppr->pr_mtx); return (error); } static void linux_prison_destructor(void *data) { free(data, M_PRISON); } void linux_osd_jail_register(void) { struct prison *pr; osd_method_t methods[PR_MAXMETHOD] = { [PR_METHOD_CREATE] = linux_prison_create, [PR_METHOD_GET] = linux_prison_get, [PR_METHOD_SET] = linux_prison_set, [PR_METHOD_CHECK] = linux_prison_check }; linux_osd_jail_slot = osd_jail_register(linux_prison_destructor, methods); /* Copy the system Linux info to any current prisons. */ sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) linux_alloc_prison(pr, NULL); sx_sunlock(&allprison_lock); } void linux_osd_jail_deregister(void) { osd_jail_deregister(linux_osd_jail_slot); } void linux_get_osname(struct thread *td, char *dst) { struct prison *pr; struct linux_prison *lpr; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); } static int linux_set_osname(struct thread *td, char *osname) { struct prison *pr; struct linux_prison *lpr; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); return (0); } void linux_get_osrelease(struct thread *td, char *dst) { struct prison *pr; struct linux_prison *lpr; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); } int linux_kernver(struct thread *td) { struct prison *pr; struct linux_prison *lpr; int osrel; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); osrel = lpr->pr_osrel; mtx_unlock(&pr->pr_mtx); return (osrel); } static int linux_set_osrelease(struct thread *td, char *osrelease) { struct prison *pr; struct linux_prison *lpr; int error; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); error = linux_map_osrel(osrelease, &lpr->pr_osrel); if (error == 0) strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); return (error); } int linux_get_oss_version(struct thread *td) { struct prison *pr; struct linux_prison *lpr; int version; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); version = lpr->pr_oss_version; mtx_unlock(&pr->pr_mtx); return (version); } static int linux_set_oss_version(struct thread *td, int oss_version) { struct prison *pr; struct linux_prison *lpr; lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); lpr->pr_oss_version = oss_version; mtx_unlock(&pr->pr_mtx); return (0); } Index: head/sys/compat/linux/linux_misc.c =================================================================== --- head/sys/compat/linux/linux_misc.c (revision 364365) +++ head/sys/compat/linux/linux_misc.c (revision 364366) @@ -1,2491 +1,2543 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #if defined(__i386__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #include #include /** * Special DTrace provider for the linuxulator. * * In this file we define the provider for the entire linuxulator. All * modules (= files of the linuxulator) use it. * * We define a different name depending on the emulated bitsize, see * ../..//linux{,32}/linux.h, e.g.: * native bitsize = linuxulator * amd64, 32bit emulation = linuxulator32 */ LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalhigh; l_ulong freehigh; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; struct l_pselect6arg { l_uintptr_t ss; l_size_t ss_len; }; static int linux_utimensat_nsec_valid(l_long); int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; int i, j; struct timespec ts; bzero(&sysinfo, sizeof(sysinfo)); getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; /* * sharedram counts pages allocated to named, swap-backed objects such * as shared memory segments and tmpfs files. There is no cheap way to * compute this, so just leave the field unpopulated. Linux itself only * started setting this field in the 3.x timeframe. */ sysinfo.sharedram = 0; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* * Platforms supported by the emulation layer do not have a notion of * high memory. */ sysinfo.totalhigh = 0; sysinfo.freehigh = 0; sysinfo.mem_unit = 1; return (copyout(&sysinfo, args->info, sizeof(sysinfo))); } #ifdef LINUX_LEGACY_SYSCALLS int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; int error; secs = args->secs; /* * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 * to match kern_setitimer()'s limit to avoid error from it. * * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit * platforms. */ if (secs > INT32_MAX / 2) secs = INT32_MAX / 2; it.it_value.tv_sec = secs; it.it_value.tv_usec = 0; timevalclear(&it.it_interval); error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); KASSERT(error == 0, ("kern_setitimer returns %d", error)); if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || old_it.it_value.tv_usec >= 500000) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; return (0); } #endif int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; uintptr_t new, old; old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (uintptr_t)args->dsend; if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) td->td_retval[0] = (register_t)new; else td->td_retval[0] = (register_t)old; return (0); } #if defined(__i386__) /* XXX: what about amd64/linux32? */ int linux_uselib(struct thread *td, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; vm_map_t map; vm_map_entry_t entry; struct vattr attr; vm_offset_t vmaddr; unsigned long file_offset; unsigned long bss_size; char *library; ssize_t aresid; int error; bool locked, opened, textset; - LCONVPATHEXIST(td, args->library, &library); - a_out = NULL; vp = NULL; locked = false; textset = false; opened = false; - NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, - UIO_SYSSPACE, library, td); - error = namei(&ni); - LFREEPATH(library); + if (!LUSECONVPATH(td)) { + NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, + UIO_USERSPACE, args->library, td); + error = namei(&ni); + } else { + LCONVPATHEXIST(td, args->library, &library); + NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, + UIO_SYSSPACE, library, td); + error = namei(&ni); + LFREEPATH(library); + } if (error) goto cleanup; vp = ni.ni_vp; NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. * XXX: The code below largely duplicates exec_check_permissions(). */ locked = true; /* Executable? */ error = VOP_GETATTR(vp, &attr, td->td_ucred); if (error) goto cleanup; if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { /* EACCESS is what exec(2) returns. */ error = ENOEXEC; goto cleanup; } /* Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; goto cleanup; } /* Can we access it? */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) goto cleanup; /* * XXX: This should use vn_open() so that it is properly authorized, * and to reduce code redundancy all over the place here. * XXX: Not really, it duplicates far more of exec_check_permissions() * than vn_open(). */ #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VREAD); if (error) goto cleanup; #endif error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error) goto cleanup; opened = true; /* Pull in executable header into exec_map */ error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); if (error) goto cleanup; /* Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* * While we are here, we should REALLY do some more checks */ /* Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* * text/data/bss must not exceed limits * XXX - this is not complete. it should check current usage PLUS * the resources needed by this library. */ PROC_LOCK(td->td_proc); if (a_out->a_text > maxtsiz || a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || racct_set(td->td_proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(td->td_proc); error = ENOMEM; goto cleanup; } PROC_UNLOCK(td->td_proc); /* * Prevent more writers. */ error = VOP_SET_TEXT(vp); if (error != 0) goto cleanup; textset = true; /* * Lock no longer needed */ locked = false; VOP_UNLOCK(vp); /* * Check if file_offset page aligned. Currently we cannot handle * misalinged file offsets, and so we read in the entire image * (what a waste). */ if (file_offset & PAGE_MASK) { /* Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, td->td_ucred, NOCRED, &aresid, td); if (error != 0) goto cleanup; if (aresid != 0) { error = ENOEXEC; goto cleanup; } } else { /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single * copy-on-write "data" segment. */ map = &td->td_proc->p_vmspace->vm_map; error = vm_mmap(map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); if (error) goto cleanup; vm_map_lock(map); if (!vm_map_lookup_entry(map, vmaddr, &entry)) { vm_map_unlock(map); error = EDOOFUS; goto cleanup; } entry->eflags |= MAP_ENTRY_VN_EXEC; vm_map_unlock(map); textset = false; } if (bss_size != 0) { /* Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* allocate some 'anon' space */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: if (opened) { if (locked) VOP_UNLOCK(vp); locked = false; VOP_CLOSE(vp, FREAD, td->td_ucred, td); } if (textset) { if (!locked) { locked = true; VOP_LOCK(vp, LK_SHARED | LK_RETRY); } VOP_UNSET_TEXT_CHECKED(vp); } if (locked) VOP_UNLOCK(vp); /* Release the temporary mapping. */ if (a_out) kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); return (error); } #endif /* __i386__ */ #ifdef LINUX_LEGACY_SYSCALLS int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, LINUX_NFDBITS); if (error) goto select_out; if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: return (error); } #endif int linux_mremap(struct thread *td, struct linux_mremap_args *args) { uintptr_t addr; size_t len; int error = 0; if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return (ENOMEM); } if (args->new_len < args->old_len) { addr = args->addr + args->new_len; len = args->old_len - args->new_len; error = kern_munmap(td, addr, len); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return (error); } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { return (kern_msync(td, args->addr, args->len, args->fl & ~LINUX_MS_SYNC)); } #ifdef LINUX_LEGACY_SYSCALLS int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return (error); td->td_retval[0] = tm; return (0); } #endif struct l_times_argv { l_clock_t tms_utime; l_clock_t tms_stime; l_clock_t tms_cutime; l_clock_t tms_cstime; }; /* * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK * auxiliary vector entry. */ #define CLK_TCK 100 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ CONVNTCK(r) : CONVOTCK(r)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return (error); } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return (0); } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } #if defined(__amd64__) /* * On amd64, Linux uname(2) needs to return "x86_64" * for both 64-bit and 32-bit applications. On 32-bit, * the string returned by getauxval(AT_PLATFORM) needs * to remain "i686", though. */ strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); #else strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); #endif return (copyout(&utsname, args->buf, sizeof(utsname))); } struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; #ifdef LINUX_LEGACY_SYSCALLS int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; char *fname; int error; + bool convpath; - LCONVPATHEXIST(td, args->fname, &fname); + convpath = LUSECONVPATH(td); + if (convpath) + LCONVPATHEXIST(td, args->fname, &fname); if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut))) { - LFREEPATH(fname); + if (convpath) + LFREEPATH(fname); return (error); } tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; - error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, - UIO_SYSSPACE); - LFREEPATH(fname); + if (!convpath) { + error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, + tvp, UIO_SYSSPACE); + } else { + error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, + UIO_SYSSPACE); + LFREEPATH(fname); + } return (error); } #endif #ifdef LINUX_LEGACY_SYSCALLS int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error; + bool convpath; - LCONVPATHEXIST(td, args->fname, &fname); + convpath = LUSECONVPATH(td); + if (convpath) + LCONVPATHEXIST(td, args->fname, &fname); if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } - error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, - tvp, UIO_SYSSPACE); - LFREEPATH(fname); + if (!convpath) { + error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, + tvp, UIO_SYSSPACE); + } else { + error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, + tvp, UIO_SYSSPACE); + LFREEPATH(fname); + } return (error); } #endif static int linux_utimensat_nsec_valid(l_long nsec) { if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) return (0); if (nsec >= 0 && nsec <= 999999999) return (0); return (1); } int linux_utimensat(struct thread *td, struct linux_utimensat_args *args) { struct l_timespec l_times[2]; struct timespec times[2], *timesp = NULL; char *path = NULL; int error, dfd, flags = 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); if (args->times != NULL) { error = copyin(args->times, l_times, sizeof(l_times)); if (error != 0) return (error); if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) return (EINVAL); times[0].tv_sec = l_times[0].tv_sec; switch (l_times[0].tv_nsec) { case LINUX_UTIME_OMIT: times[0].tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times[0].tv_nsec = UTIME_NOW; break; default: times[0].tv_nsec = l_times[0].tv_nsec; } times[1].tv_sec = l_times[1].tv_sec; switch (l_times[1].tv_nsec) { case LINUX_UTIME_OMIT: times[1].tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times[1].tv_nsec = UTIME_NOW; break; default: times[1].tv_nsec = l_times[1].tv_nsec; break; } timesp = times; /* This breaks POSIX, but is what the Linux kernel does * _on purpose_ (documented in the man page for utimensat(2)), * so we must follow that behaviour. */ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) return (0); } if (args->pathname != NULL) LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); else if (args->flags != 0) return (EINVAL); if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) flags |= AT_SYMLINK_NOFOLLOW; if (path == NULL) error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); else { error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, UIO_SYSSPACE, flags); LFREEPATH(path); } return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error, dfd; + bool convpath; + convpath = LUSECONVPATH(td); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; - LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); + if (convpath) + LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv))) { - LFREEPATH(fname); + if (convpath) + LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } - error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); - LFREEPATH(fname); + if (!convpath) { + error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, + tvp, UIO_SYSSPACE); + } else { + error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); + LFREEPATH(fname); + } return (error); } #endif static int linux_common_wait(struct thread *td, int pid, int *statusp, int options, struct __wrusage *wrup) { siginfo_t siginfo; idtype_t idtype; id_t id; int error, status, tmpstat; if (pid == WAIT_ANY) { idtype = P_ALL; id = 0; } else if (pid < 0) { idtype = P_PGID; id = (id_t)-pid; } else { idtype = P_PID; id = (id_t)pid; } /* * For backward compatibility we implicitly add flags WEXITED * and WTRAPPED here. */ options |= WEXITED | WTRAPPED; error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); if (error) return (error); if (statusp) { tmpstat = status & 0xffff; if (WIFSIGNALED(tmpstat)) { tmpstat = (tmpstat & 0xffffff80) | bsd_to_linux_signal(WTERMSIG(tmpstat)); } else if (WIFSTOPPED(tmpstat)) { tmpstat = (tmpstat & 0xffff00ff) | (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); #if defined(__amd64__) && !defined(COMPAT_LINUX32) if (WSTOPSIG(status) == SIGTRAP) { tmpstat = linux_ptrace_status(td, siginfo.si_pid, tmpstat); } #endif } else if (WIFCONTINUED(tmpstat)) { tmpstat = 0xffff; } error = copyout(&tmpstat, statusp, sizeof(int)); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { struct linux_wait4_args wait4_args; wait4_args.pid = args->pid; wait4_args.status = args->status; wait4_args.options = args->options; wait4_args.rusage = NULL; return (linux_wait4(td, &wait4_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct __wrusage wru, *wrup; if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); options = WEXITED; linux_to_bsd_waitopts(args->options, &options); if (args->rusage != NULL) wrup = &wru; else wrup = NULL; error = linux_common_wait(td, args->pid, args->status, options, wrup); if (error != 0) return (error); if (args->rusage != NULL) error = linux_copyout_rusage(&wru.wru_self, args->rusage); return (error); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { int status, options, sig; struct __wrusage wru; siginfo_t siginfo; l_siginfo_t lsi; idtype_t idtype; struct proc *p; int error; options = 0; linux_to_bsd_waitopts(args->options, &options); if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) return (EINVAL); if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) return (EINVAL); switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; break; case LINUX_P_PID: if (args->id <= 0) return (EINVAL); idtype = P_PID; break; case LINUX_P_PGID: if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; default: return (EINVAL); } error = kern_wait6(td, idtype, args->id, &status, options, &wru, &siginfo); if (error != 0) return (error); if (args->rusage != NULL) { error = linux_copyout_rusage(&wru.wru_children, args->rusage); if (error != 0) return (error); } if (args->info != NULL) { p = td->td_proc; bzero(&lsi, sizeof(lsi)); if (td->td_retval[0] != 0) { sig = bsd_to_linux_signal(siginfo.si_signo); siginfo_to_lsiginfo(&siginfo, &lsi, sig); } error = copyout(&lsi, args->info, sizeof(lsi)); } td->td_retval[0] = 0; return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mknod(struct thread *td, struct linux_mknod_args *args) { char *path; int error; + enum uio_seg seg; + bool convpath; - LCONVPATHCREAT(td, args->path, &path); + convpath = LUSECONVPATH(td); + if (!convpath) { + path = args->path; + seg = UIO_USERSPACE; + } else { + LCONVPATHCREAT(td, args->path, &path); + seg = UIO_SYSSPACE; + } switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: - error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, + error = kern_mkfifoat(td, AT_FDCWD, path, seg, args->mode); break; case S_IFCHR: case S_IFBLK: - error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, + error = kern_mknodat(td, AT_FDCWD, path, seg, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: - error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, + error = kern_openat(td, AT_FDCWD, path, seg, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } - LFREEPATH(path); + if (convpath) + LFREEPATH(path); return (error); } #endif int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { char *path; int error, dfd; + enum uio_seg seg; + bool convpath; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; - LCONVPATHCREAT_AT(td, args->filename, &path, dfd); + convpath = LUSECONVPATH(td); + if (!convpath) { + path = __DECONST(char *, args->filename); + seg = UIO_USERSPACE; + } else { + LCONVPATHCREAT_AT(td, args->filename, &path, dfd); + seg = UIO_SYSSPACE; + } + switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: - error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); + error = kern_mkfifoat(td, dfd, path, seg, args->mode); break; case S_IFCHR: case S_IFBLK: - error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, + error = kern_mknodat(td, dfd, path, seg, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: - error = kern_openat(td, dfd, path, UIO_SYSSPACE, + error = kern_openat(td, dfd, path, seg, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } - LFREEPATH(path); + if (convpath) + LFREEPATH(path); return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { struct linux_pemuldata *pem; struct proc *p = td->td_proc; uint32_t old; PROC_LOCK(p); pem = pem_find(p); old = pem->persona; if (args->per != 0xffffffff) pem->persona = args->per; PROC_UNLOCK(p); td->td_retval[0] = old; return (0); } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; newcred = crget(); crextend(newcred, ngrp + 1); p = td->td_proc; PROC_LOCK(p); oldcred = p->p_ucred; crcopy(newcred, oldcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { PROC_UNLOCK(p); crfree(newcred); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); return (error); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); free(linux_gidset, M_LINUX); if (error) return (error); td->td_retval[0] = ngrp; return (0); } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_param sched_param; struct thread *tdt; int error, policy; switch (args->policy) { case LINUX_SCHED_OTHER: policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: policy = SCHED_FIFO; break; case LINUX_SCHED_RR: policy = SCHED_RR; break; default: return (EINVAL); } error = copyin(args->param, &sched_param, sizeof(sched_param)); if (error) return (error); if (linux_map_sched_prio) { switch (policy) { case SCHED_OTHER: if (sched_param.sched_priority != 0) return (EINVAL); sched_param.sched_priority = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; break; case SCHED_FIFO: case SCHED_RR: if (sched_param.sched_priority < 1 || sched_param.sched_priority >= LINUX_MAX_RT_PRIO) return (EINVAL); /* * Map [1, LINUX_MAX_RT_PRIO - 1] to * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). */ sched_param.sched_priority = (sched_param.sched_priority - 1) * (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / (LINUX_MAX_RT_PRIO - 1); break; } } tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setscheduler(td, tdt, policy, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct thread *tdt; int error, policy; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return (error); } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; if (linux_map_sched_prio) { switch (args->policy) { case LINUX_SCHED_OTHER: td->td_retval[0] = 0; return (0); case LINUX_SCHED_FIFO: case LINUX_SCHED_RR: td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; return (0); default: return (EINVAL); } } switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_max(td, &bsd)); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; if (linux_map_sched_prio) { switch (args->policy) { case LINUX_SCHED_OTHER: td->td_retval[0] = 0; return (0); case LINUX_SCHED_FIFO: case LINUX_SCHED_RR: td->td_retval[0] = 1; return (0); default: return (EINVAL); } } switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_min(td, &bsd)); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; if (args->magic1 != REBOOT_MAGIC1) return (EINVAL); switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return (EINVAL); } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return (EINVAL); } return (sys_reboot(td, &bsd_args)); } int linux_getpid(struct thread *td, struct linux_getpid_args *args) { td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { struct linux_emuldata *em; em = em_find(td); KASSERT(em != NULL, ("gettid: emuldata not found.\n")); td->td_retval[0] = em->em_tid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { td->td_retval[0] = kern_getppid(td); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { return (kern_getsid(td, args->pid)); } int linux_nosys(struct thread *td, struct nosys_args *ignore) { return (ENOSYS); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { int error; error = kern_getpriority(td, args->which, args->who); td->td_retval[0] = 20 - td->td_retval[0]; return (error); } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, args->error_code); /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, args->error_code, 0); /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION_1 0x19980330 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 struct l_user_cap_header { l_int version; l_int pid; }; struct l_user_cap_data { l_int effective; l_int permitted; l_int inheritable; }; int linux_capget(struct thread *td, struct linux_capget_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, u32s; if (uap->hdrp == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); if (uap->datap) { /* * The current implementation doesn't support setting * a capability (it's essentially a stub) so indicate * that no capabilities are currently set or available * to request. */ memset(&lucd, 0, u32s * sizeof(lucd[0])); error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); } return (error); } int linux_capset(struct thread *td, struct linux_capset_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, i, u32s; if (uap->hdrp == NULL || uap->datap == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); if (error != 0) return (error); /* We currently don't support setting any capabilities. */ for (i = 0; i < u32s; i++) { if (lucd[i].effective || lucd[i].permitted || lucd[i].inheritable) { linux_msg(td, "capset[%d] effective=0x%x, permitted=0x%x, " "inheritable=0x%x is not implemented", i, (int)lucd[i].effective, (int)lucd[i].permitted, (int)lucd[i].inheritable); return (EPERM); } } return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; int pdeath_signal; switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); pdeath_signal = linux_to_bsd_signal(args->arg2); return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, &pdeath_signal)); case LINUX_PR_GET_PDEATHSIG: error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, &pdeath_signal); if (error != 0) return (error); pdeath_signal = bsd_to_linux_signal(pdeath_signal); return (copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal))); break; case LINUX_PR_GET_KEEPCAPS: /* * Indicate that we always clear the effective and * permitted capability sets when the user id becomes * non-zero (actually the capability sets are simply * always zero in the current implementation). */ td->td_retval[0] = 0; break; case LINUX_PR_SET_KEEPCAPS: /* * Ignore requests to keep the effective and permitted * capability sets when the user id becomes non-zero. */ break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a Linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; default: error = EINVAL; break; } return (error); } int linux_sched_setparam(struct thread *td, struct linux_sched_setparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error, policy; error = copyin(uap->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); if (linux_map_sched_prio) { error = kern_sched_getscheduler(td, tdt, &policy); if (error) goto out; switch (policy) { case SCHED_OTHER: if (sched_param.sched_priority != 0) { error = EINVAL; goto out; } sched_param.sched_priority = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; break; case SCHED_FIFO: case SCHED_RR: if (sched_param.sched_priority < 1 || sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { error = EINVAL; goto out; } /* * Map [1, LINUX_MAX_RT_PRIO - 1] to * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). */ sched_param.sched_priority = (sched_param.sched_priority - 1) * (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / (LINUX_MAX_RT_PRIO - 1); break; } } error = kern_sched_setparam(td, tdt, &sched_param); out: PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getparam(struct thread *td, struct linux_sched_getparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error, policy; tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); if (error) { PROC_UNLOCK(tdt->td_proc); return (error); } if (linux_map_sched_prio) { error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); if (error) return (error); switch (policy) { case SCHED_OTHER: sched_param.sched_priority = 0; break; case SCHED_FIFO: case SCHED_RR: /* * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). */ sched_param.sched_priority = (sched_param.sched_priority * (LINUX_MAX_RT_PRIO - 1) + (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; break; } } else PROC_UNLOCK(tdt->td_proc); error = copyout(&sched_param, uap->param, sizeof(sched_param)); return (error); } /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; struct thread *tdt; if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); if (error == 0) td->td_retval[0] = sizeof(cpuset_t); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct thread *tdt; if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); } struct linux_rlimit64 { uint64_t rlim_cur; uint64_t rlim_max; }; int linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) { struct rlimit rlim, nrlim; struct linux_rlimit64 lrlim; struct proc *p; u_int which; int flags; int error; if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); if (args->new != NULL) { /* * Note. Unlike FreeBSD where rlim is signed 64-bit Linux * rlim is unsigned 64-bit. FreeBSD treats negative limits * as INFINITY so we do not need a conversion even. */ error = copyin(args->new, &nrlim, sizeof(nrlim)); if (error != 0) return (error); } flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; if (args->pid == 0) { p = td->td_proc; PHOLD(p); } else { error = pget(args->pid, flags, &p); if (error != 0) return (error); } if (args->old != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur == RLIM_INFINITY) lrlim.rlim_cur = LINUX_RLIM_INFINITY; else lrlim.rlim_cur = rlim.rlim_cur; if (rlim.rlim_max == RLIM_INFINITY) lrlim.rlim_max = LINUX_RLIM_INFINITY; else lrlim.rlim_max = rlim.rlim_max; error = copyout(&lrlim, args->old, sizeof(lrlim)); if (error != 0) goto out; } if (args->new != NULL) error = kern_proc_setrlimit(td, p, which, &nrlim); out: PRELE(p); return (error); } int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; struct l_timespec lts; struct timespec uts; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; ssp = NULL; if (args->sig != NULL) { error = copyin(args->sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); if (lpse6.ss_len != sizeof(l_ss)) return (EINVAL); if (lpse6.ss != 0) { error = copyin(PTRIN(lpse6.ss), &l_ss, sizeof(l_ss)); if (error != 0) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } } /* * Currently glibc changes nanosecond number to microsecond. * This mean losing precision but for now it is hardly seen. */ if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); TIMESPEC_TO_TIMEVAL(&utv, &uts); if (itimerfix(&utv)) return (EINVAL); microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_pselect(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, ssp, LINUX_NFDBITS); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0] != 0) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, &uts); error = native_to_linux_timespec(<s, &uts); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } int linux_ppoll(struct thread *td, struct linux_ppoll_args *args) { struct timespec ts0, ts1; struct l_timespec lts; struct timespec uts, *tsp; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; if (args->sset != NULL) { if (args->ssize != sizeof(l_ss)) return (EINVAL); error = copyin(args->sset, &l_ss, sizeof(l_ss)); if (error) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } else ssp = NULL; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); nanotime(&ts0); tsp = &uts; } else tsp = NULL; error = kern_poll(td, args->fds, args->nfds, tsp, ssp); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0]) { nanotime(&ts1); timespecsub(&ts1, &ts0, &ts1); timespecsub(&uts, &ts1, &uts); if (uts.tv_sec < 0) timespecclear(&uts); } else timespecclear(&uts); error = native_to_linux_timespec(<s, &uts); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; struct l_timespec lts; struct thread *tdt; int error; /* * According to man in case the invalid pid specified * EINVAL should be returned. */ if (uap->pid < 0) return (EINVAL); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, &ts); PROC_UNLOCK(tdt->td_proc); if (error != 0) return (error); error = native_to_linux_timespec(<s, &ts); if (error != 0) return (error); return (copyout(<s, uap->interval, sizeof(lts))); } /* * In case when the Linux thread is the initial thread in * the thread group thread id is equal to the process id. * Glibc depends on this magic (assert in pthread_getattr_np.c). */ struct thread * linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) { struct linux_emuldata *em; struct thread *tdt; struct proc *p; tdt = NULL; if (tid == 0 || tid == td->td_tid) { tdt = td; PROC_LOCK(tdt->td_proc); } else if (tid > PID_MAX) tdt = tdfind(tid, pid); else { /* * Initial thread where the tid equal to the pid. */ p = pfind(tid); if (p != NULL) { if (SV_PROC_ABI(p) != SV_ABI_LINUX) { /* * p is not a Linuxulator process. */ PROC_UNLOCK(p); return (NULL); } FOREACH_THREAD_IN_PROC(p, tdt) { em = em_find(tdt); if (tid == em->em_tid) return (tdt); } PROC_UNLOCK(p); } return (NULL); } return (tdt); } void linux_to_bsd_waitopts(int options, int *bsdopts) { if (options & LINUX_WNOHANG) *bsdopts |= WNOHANG; if (options & LINUX_WUNTRACED) *bsdopts |= WUNTRACED; if (options & LINUX_WEXITED) *bsdopts |= WEXITED; if (options & LINUX_WCONTINUED) *bsdopts |= WCONTINUED; if (options & LINUX_WNOWAIT) *bsdopts |= WNOWAIT; if (options & __WCLONE) *bsdopts |= WLINUXCLONE; } int linux_getrandom(struct thread *td, struct linux_getrandom_args *args) { struct uio uio; struct iovec iov; int error; if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) return (EINVAL); if (args->count > INT_MAX) args->count = INT_MAX; iov.iov_base = args->buf; iov.iov_len = args->count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = iov.iov_len; uio.uio_segflg = UIO_USERSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); if (error == 0) td->td_retval[0] = args->count - uio.uio_resid; return (error); } int linux_mincore(struct thread *td, struct linux_mincore_args *args) { /* Needs to be page-aligned */ if (args->start & PAGE_MASK) return (EINVAL); return (kern_mincore(td, args->start, args->len, args->vec)); } #define SYSLOG_TAG "<6>" int linux_syslog(struct thread *td, struct linux_syslog_args *args) { char buf[128], *src, *dst; u_int seq; int buflen, error; if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { linux_msg(td, "syslog unsupported type 0x%x", args->type); return (EINVAL); } if (args->len < 6) { td->td_retval[0] = 0; return (0); } error = priv_check(td, PRIV_MSGBUF); if (error) return (error); mtx_lock(&msgbuf_lock); msgbuf_peekbytes(msgbufp, NULL, 0, &seq); mtx_unlock(&msgbuf_lock); dst = args->buf; error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); /* The -1 is to skip the trailing '\0'. */ dst += sizeof(SYSLOG_TAG) - 1; while (error == 0) { mtx_lock(&msgbuf_lock); buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); mtx_unlock(&msgbuf_lock); if (buflen == 0) break; for (src = buf; src < buf + buflen && error == 0; src++) { if (*src == '\0') continue; if (dst >= args->buf + args->len) goto out; error = copyout(src, dst, 1); dst++; if (*src == '\n' && *(src + 1) != '<' && dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); dst += sizeof(SYSLOG_TAG) - 1; } } } out: td->td_retval[0] = dst - args->buf; return (error); } int linux_getcpu(struct thread *td, struct linux_getcpu_args *args) { int cpu, error, node; cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ error = 0; node = cpuid_to_pcpu[cpu]->pc_domain; if (args->cpu != NULL) error = copyout(&cpu, args->cpu, sizeof(l_int)); if (args->node != NULL) error = copyout(&node, args->node, sizeof(l_int)); return (error); } Index: head/sys/compat/linux/linux_stats.c =================================================================== --- head/sys/compat/linux/linux_stats.c (revision 364365) +++ head/sys/compat/linux/linux_stats.c (revision 364366) @@ -1,697 +1,729 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include static void translate_vnhook_major_minor(struct vnode *vp, struct stat *sb) { int major, minor; if (vn_isdisk(vp, NULL)) { sb->st_mode &= ~S_IFMT; sb->st_mode |= S_IFBLK; } /* * Return the same st_dev for every devfs instance. The reason * for this is to work around an idiosyncrasy of glibc getttynam() * implementation: it checks whether st_dev returned for fd 0 * is the same as st_dev returned for the target of /proc/self/fd/0 * symlink, and with linux chroots having their own devfs instance, * the check will fail if you chroot into it. */ if (rootdevmp != NULL && vp->v_mount->mnt_vfc == rootdevmp->mnt_vfc) sb->st_dev = rootdevmp->mnt_stat.f_fsid.val[0]; if (vp->v_type == VCHR && vp->v_rdev != NULL && linux_driver_get_major_minor(devtoname(vp->v_rdev), &major, &minor) == 0) { sb->st_rdev = (major << 8 | minor); } } static int -linux_kern_statat(struct thread *td, int flag, int fd, char *path, +linux_kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp) { return (kern_statat(td, flag, fd, path, pathseg, sbp, translate_vnhook_major_minor)); } #ifdef LINUX_LEGACY_SYSCALLS static int -linux_kern_stat(struct thread *td, char *path, enum uio_seg pathseg, +linux_kern_stat(struct thread *td, const char *path, enum uio_seg pathseg, struct stat *sbp) { return (linux_kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); } static int -linux_kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, +linux_kern_lstat(struct thread *td, const char *path, enum uio_seg pathseg, struct stat *sbp) { return (linux_kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, sbp)); } #endif static void translate_fd_major_minor(struct thread *td, int fd, struct stat *buf) { struct file *fp; struct vnode *vp; struct mount *mp; int major, minor; /* * No capability rights required here. */ if ((!S_ISCHR(buf->st_mode) && !S_ISBLK(buf->st_mode)) || fget(td, fd, &cap_no_rights, &fp) != 0) return; vp = fp->f_vnode; if (vp != NULL && vn_isdisk(vp, NULL)) { buf->st_mode &= ~S_IFMT; buf->st_mode |= S_IFBLK; } if (vp != NULL && rootdevmp != NULL) { mp = vp->v_mount; __compiler_membar(); if (mp != NULL && mp->mnt_vfc == rootdevmp->mnt_vfc) buf->st_dev = rootdevmp->mnt_stat.f_fsid.val[0]; } if (vp != NULL && vp->v_rdev != NULL && linux_driver_get_major_minor(devtoname(vp->v_rdev), &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } else if (fp->f_type == DTYPE_PTS) { struct tty *tp = fp->f_data; /* Convert the numbers for the slave device. */ if (linux_driver_get_major_minor(devtoname(tp->t_dev), &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } } fdrop(fp, td); } /* * l_dev_t has the same encoding as dev_t in the latter's low 16 bits, so * truncation of a dev_t to 16 bits gives the same result as unpacking * using major() and minor() and repacking in the l_dev_t format. This * detail is hidden in dev_to_ldev(). Overflow in conversions of dev_t's * are not checked for, as for other fields. * * dev_to_ldev() is only used for translating st_dev. When we convert * st_rdev for copying it out, it isn't really a dev_t, but has already * been translated to an l_dev_t in a nontrivial way. Translating it * again would be illogical but would have no effect since the low 16 * bits have the same encoding. * * The nontrivial translation for st_rdev renumbers some devices, but not * ones that can be mounted on, so it is consistent with the translation * for st_dev except when the renumbering or truncation causes conflicts. */ #define dev_to_ldev(d) ((uint16_t)(d)) static int newstat_copyout(struct stat *buf, void *ubuf) { struct l_newstat tbuf; bzero(&tbuf, sizeof(tbuf)); tbuf.st_dev = dev_to_ldev(buf->st_dev); tbuf.st_ino = buf->st_ino; tbuf.st_mode = buf->st_mode; tbuf.st_nlink = buf->st_nlink; tbuf.st_uid = buf->st_uid; tbuf.st_gid = buf->st_gid; tbuf.st_rdev = buf->st_rdev; tbuf.st_size = buf->st_size; tbuf.st_atim.tv_sec = buf->st_atim.tv_sec; tbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; tbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; tbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; tbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; tbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; tbuf.st_blksize = buf->st_blksize; tbuf.st_blocks = buf->st_blocks; return (copyout(&tbuf, ubuf, sizeof(tbuf))); } #ifdef LINUX_LEGACY_SYSCALLS int linux_newstat(struct thread *td, struct linux_newstat_args *args) { struct stat buf; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - - error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + error = linux_kern_stat(td, args->path, UIO_USERSPACE, &buf); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); + LFREEPATH(path); + } if (error) return (error); return (newstat_copyout(&buf, args->buf)); } int linux_newlstat(struct thread *td, struct linux_newlstat_args *args) { struct stat sb; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - - error = linux_kern_lstat(td, path, UIO_SYSSPACE, &sb); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + error = linux_kern_lstat(td, args->path, UIO_USERSPACE, &sb); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = linux_kern_lstat(td, path, UIO_SYSSPACE, &sb); + LFREEPATH(path); + } if (error) return (error); return (newstat_copyout(&sb, args->buf)); } #endif int linux_newfstat(struct thread *td, struct linux_newfstat_args *args) { struct stat buf; int error; error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = newstat_copyout(&buf, args->buf); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat_copyout(struct stat *buf, void *ubuf) { struct l_stat lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = dev_to_ldev(buf->st_dev); lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; lbuf.st_size = MIN(buf->st_size, INT32_MAX); lbuf.st_atim.tv_sec = buf->st_atim.tv_sec; lbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; lbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; lbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; lbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; lbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; lbuf.st_flags = buf->st_flags; lbuf.st_gen = buf->st_gen; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat(struct thread *td, struct linux_stat_args *args) { struct stat buf; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - - error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); - if (error) { + if (!LUSECONVPATH(td)) { + error = linux_kern_stat(td, args->path, UIO_USERSPACE, &buf); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); LFREEPATH(path); + } + if (error) { return (error); } - LFREEPATH(path); return (stat_copyout(&buf, args->up)); } int linux_lstat(struct thread *td, struct linux_lstat_args *args) { struct stat buf; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - - error = linux_kern_lstat(td, path, UIO_SYSSPACE, &buf); - if (error) { + if (!LUSECONVPATH(td)) { + error = linux_kern_lstat(td, args->path, UIO_USERSPACE, &buf); + } else { + LCONVPATHEXIST(td, args->path, &path); + error = linux_kern_lstat(td, path, UIO_SYSSPACE, &buf); LFREEPATH(path); + } + if (error) { return (error); } - LFREEPATH(path); return (stat_copyout(&buf, args->up)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ struct l_statfs { l_long f_type; l_long f_bsize; l_long f_blocks; l_long f_bfree; l_long f_bavail; l_long f_files; l_long f_ffree; l_fsid_t f_fsid; l_long f_namelen; l_long f_frsize; l_long f_flags; l_long f_spare[4]; }; #define LINUX_CODA_SUPER_MAGIC 0x73757245L #define LINUX_EXT2_SUPER_MAGIC 0xEF53L #define LINUX_HPFS_SUPER_MAGIC 0xf995e849L #define LINUX_ISOFS_SUPER_MAGIC 0x9660L #define LINUX_MSDOS_SUPER_MAGIC 0x4d44L #define LINUX_NCP_SUPER_MAGIC 0x564cL #define LINUX_NFS_SUPER_MAGIC 0x6969L #define LINUX_NTFS_SUPER_MAGIC 0x5346544EL #define LINUX_PROC_SUPER_MAGIC 0x9fa0L #define LINUX_UFS_SUPER_MAGIC 0x00011954L /* XXX - UFS_MAGIC in Linux */ #define LINUX_ZFS_SUPER_MAGIC 0x2FC12FC1 #define LINUX_DEVFS_SUPER_MAGIC 0x1373L #define LINUX_SHMFS_MAGIC 0x01021994 static long bsd_to_linux_ftype(const char *fstypename) { int i; static struct {const char *bsd_name; long linux_type;} b2l_tbl[] = { {"ufs", LINUX_UFS_SUPER_MAGIC}, {"zfs", LINUX_ZFS_SUPER_MAGIC}, {"cd9660", LINUX_ISOFS_SUPER_MAGIC}, {"nfs", LINUX_NFS_SUPER_MAGIC}, {"ext2fs", LINUX_EXT2_SUPER_MAGIC}, {"procfs", LINUX_PROC_SUPER_MAGIC}, {"msdosfs", LINUX_MSDOS_SUPER_MAGIC}, {"ntfs", LINUX_NTFS_SUPER_MAGIC}, {"nwfs", LINUX_NCP_SUPER_MAGIC}, {"hpfs", LINUX_HPFS_SUPER_MAGIC}, {"coda", LINUX_CODA_SUPER_MAGIC}, {"devfs", LINUX_DEVFS_SUPER_MAGIC}, {"tmpfs", LINUX_SHMFS_MAGIC}, {NULL, 0L}}; for (i = 0; b2l_tbl[i].bsd_name != NULL; i++) if (strcmp(b2l_tbl[i].bsd_name, fstypename) == 0) return (b2l_tbl[i].linux_type); return (0L); } static int bsd_to_linux_statfs(struct statfs *bsd_statfs, struct l_statfs *linux_statfs) { #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) uint64_t tmp; #define LINUX_HIBITS 0xffffffff00000000ULL tmp = bsd_statfs->f_blocks | bsd_statfs->f_bfree | bsd_statfs->f_files | bsd_statfs->f_bsize; if ((bsd_statfs->f_bavail != -1 && (bsd_statfs->f_bavail & LINUX_HIBITS)) || (bsd_statfs->f_ffree != -1 && (bsd_statfs->f_ffree & LINUX_HIBITS)) || (tmp & LINUX_HIBITS)) return (EOVERFLOW); #undef LINUX_HIBITS #endif linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; linux_statfs->f_frsize = bsd_statfs->f_bsize; linux_statfs->f_flags = 0; memset(linux_statfs->f_spare, 0, sizeof(linux_statfs->f_spare)); return (0); } int linux_statfs(struct thread *td, struct linux_statfs_args *args) { struct l_statfs linux_statfs; struct statfs *bsd_statfs; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); - - bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); - error = kern_statfs(td, path, UIO_SYSSPACE, bsd_statfs); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_statfs(td, args->path, UIO_USERSPACE, bsd_statfs); + } else { + LCONVPATHEXIST(td, args->path, &path); + bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_statfs(td, path, UIO_SYSSPACE, bsd_statfs); + LFREEPATH(path); + } if (error == 0) error = bsd_to_linux_statfs(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static void bsd_to_linux_statfs64(struct statfs *bsd_statfs, struct l_statfs64 *linux_statfs) { linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; linux_statfs->f_frsize = bsd_statfs->f_bsize; linux_statfs->f_flags = 0; memset(linux_statfs->f_spare, 0, sizeof(linux_statfs->f_spare)); } int linux_statfs64(struct thread *td, struct linux_statfs64_args *args) { struct l_statfs64 linux_statfs; struct statfs *bsd_statfs; char *path; int error; if (args->bufsize != sizeof(struct l_statfs64)) return (EINVAL); - LCONVPATHEXIST(td, args->path, &path); - - bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); - error = kern_statfs(td, path, UIO_SYSSPACE, bsd_statfs); - LFREEPATH(path); + if (!LUSECONVPATH(td)) { + bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_statfs(td, args->path, UIO_USERSPACE, bsd_statfs); + } else { + LCONVPATHEXIST(td, args->path, &path); + bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_statfs(td, path, UIO_SYSSPACE, bsd_statfs); + LFREEPATH(path); + } if (error == 0) bsd_to_linux_statfs64(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } int linux_fstatfs64(struct thread *td, struct linux_fstatfs64_args *args) { struct l_statfs64 linux_statfs; struct statfs *bsd_statfs; int error; if (args->bufsize != sizeof(struct l_statfs64)) return (EINVAL); bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, args->fd, bsd_statfs); if (error == 0) bsd_to_linux_statfs64(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) { struct l_statfs linux_statfs; struct statfs *bsd_statfs; int error; bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, args->fd, bsd_statfs); if (error == 0) error = bsd_to_linux_statfs(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } struct l_ustat { l_daddr_t f_tfree; l_ino_t f_tinode; char f_fname[6]; char f_fpack[6]; }; #ifdef LINUX_LEGACY_SYSCALLS int linux_ustat(struct thread *td, struct linux_ustat_args *args) { return (EOPNOTSUPP); } #endif #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat64_copyout(struct stat *buf, void *ubuf) { struct l_stat64 lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = dev_to_ldev(buf->st_dev); lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; lbuf.st_size = buf->st_size; lbuf.st_atim.tv_sec = buf->st_atim.tv_sec; lbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; lbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; lbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; lbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; lbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; /* * The __st_ino field makes all the difference. In the Linux kernel * it is conditionally compiled based on STAT64_HAS_BROKEN_ST_INO, * but without the assignment to __st_ino the runtime linker refuses * to mmap(2) any shared libraries. I guess it's broken alright :-) */ lbuf.__st_ino = buf->st_ino; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat64(struct thread *td, struct linux_stat64_args *args) { struct stat buf; char *filename; int error; - LCONVPATHEXIST(td, args->filename, &filename); - - error = linux_kern_stat(td, filename, UIO_SYSSPACE, &buf); - LFREEPATH(filename); + if (!LUSECONVPATH(td)) { + error = linux_kern_stat(td, args->filename, UIO_USERSPACE, &buf); + } else { + LCONVPATHEXIST(td, args->filename, &filename); + error = linux_kern_stat(td, filename, UIO_SYSSPACE, &buf); + LFREEPATH(filename); + } if (error) return (error); return (stat64_copyout(&buf, args->statbuf)); } int linux_lstat64(struct thread *td, struct linux_lstat64_args *args) { struct stat sb; char *filename; int error; - LCONVPATHEXIST(td, args->filename, &filename); - - error = linux_kern_lstat(td, filename, UIO_SYSSPACE, &sb); - LFREEPATH(filename); + if (!LUSECONVPATH(td)) { + error = linux_kern_lstat(td, args->filename, UIO_USERSPACE, &sb); + } else { + LCONVPATHEXIST(td, args->filename, &filename); + error = linux_kern_lstat(td, filename, UIO_SYSSPACE, &sb); + LFREEPATH(filename); + } if (error) return (error); return (stat64_copyout(&sb, args->statbuf)); } int linux_fstat64(struct thread *td, struct linux_fstat64_args *args) { struct stat buf; int error; error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = stat64_copyout(&buf, args->statbuf); return (error); } int linux_fstatat64(struct thread *td, struct linux_fstatat64_args *args) { char *path; int error, dfd, flag; struct stat buf; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; - LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); - - error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); - if (!error) + if (!LUSECONVPATH(td)) { + error = linux_kern_statat(td, flag, dfd, args->pathname, + UIO_USERSPACE, &buf); + } else { + LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); + LFREEPATH(path); + } + if (error == 0) error = stat64_copyout(&buf, args->statbuf); - LFREEPATH(path); return (error); } #else /* __amd64__ && !COMPAT_LINUX32 */ int linux_newfstatat(struct thread *td, struct linux_newfstatat_args *args) { char *path; int error, dfd, flag; struct stat buf; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; - LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); - - error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); + if (!LUSECONVPATH(td)) { + error = linux_kern_statat(td, flag, dfd, args->pathname, + UIO_USERSPACE, &buf); + } else { + LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); + LFREEPATH(path); + } if (error == 0) error = newstat_copyout(&buf, args->statbuf); - LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_syncfs(struct thread *td, struct linux_syncfs_args *args) { struct mount *mp; struct vnode *vp; int error, save; error = fgetvp(td, args->fd, &cap_fsync_rights, &vp); if (error != 0) /* * Linux syncfs() returns only EBADF, however fgetvp() * can return EINVAL in case of file descriptor does * not represent a vnode. XXX. */ return (error); mp = vp->v_mount; mtx_lock(&mountlist_mtx); error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error != 0) { /* See comment above. */ mtx_unlock(&mountlist_mtx); goto out; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { save = curthread_pflags_set(TDP_SYNCIO); vfs_periodic(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT); curthread_pflags_restore(save); vn_finished_write(mp); } vfs_unbusy(mp); out: vrele(vp); return (error); } Index: head/sys/compat/linux/linux_uid16.c =================================================================== --- head/sys/compat/linux/linux_uid16.c (revision 364365) +++ head/sys/compat/linux/linux_uid16.c (revision 364366) @@ -1,432 +1,439 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 The FreeBSD Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /** * DTrace probes in this module. */ LIN_SDT_PROBE_DEFINE3(uid16, linux_chown16, entry, "char *", "l_uid16_t", "l_gid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_chown16, conv_path, "char *"); LIN_SDT_PROBE_DEFINE1(uid16, linux_chown16, return, "int"); LIN_SDT_PROBE_DEFINE3(uid16, linux_lchown16, entry, "char *", "l_uid16_t", "l_gid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_lchown16, conv_path, "char *"); LIN_SDT_PROBE_DEFINE1(uid16, linux_lchown16, return, "int"); LIN_SDT_PROBE_DEFINE2(uid16, linux_setgroups16, entry, "l_uint", "l_gid16_t *"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, priv_check_cred_error, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, return, "int"); LIN_SDT_PROBE_DEFINE2(uid16, linux_getgroups16, entry, "l_uint", "l_gid16_t *"); LIN_SDT_PROBE_DEFINE1(uid16, linux_getgroups16, copyout_error, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_getgroups16, return, "int"); LIN_SDT_PROBE_DEFINE0(uid16, linux_getgid16, entry); LIN_SDT_PROBE_DEFINE1(uid16, linux_getgid16, return, "int"); LIN_SDT_PROBE_DEFINE0(uid16, linux_getuid16, entry); LIN_SDT_PROBE_DEFINE1(uid16, linux_getuid16, return, "int"); LIN_SDT_PROBE_DEFINE0(uid16, linux_getegid16, entry); LIN_SDT_PROBE_DEFINE1(uid16, linux_getegid16, return, "int"); LIN_SDT_PROBE_DEFINE0(uid16, linux_geteuid16, entry); LIN_SDT_PROBE_DEFINE1(uid16, linux_geteuid16, return, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgid16, entry, "l_gid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgid16, return, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setuid16, entry, "l_uid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setuid16, return, "int"); LIN_SDT_PROBE_DEFINE2(uid16, linux_setregid16, entry, "l_gid16_t", "l_gid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setregid16, return, "int"); LIN_SDT_PROBE_DEFINE2(uid16, linux_setreuid16, entry, "l_uid16_t", "l_uid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setreuid16, return, "int"); LIN_SDT_PROBE_DEFINE3(uid16, linux_setresgid16, entry, "l_gid16_t", "l_gid16_t", "l_gid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setresgid16, return, "int"); LIN_SDT_PROBE_DEFINE3(uid16, linux_setresuid16, entry, "l_uid16_t", "l_uid16_t", "l_uid16_t"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setresuid16, return, "int"); DUMMY(setfsuid16); DUMMY(setfsgid16); DUMMY(getresuid16); DUMMY(getresgid16); #define CAST_NOCHG(x) ((x == 0xFFFF) ? -1 : x) int linux_chown16(struct thread *td, struct linux_chown16_args *args) { char *path; int error; - LCONVPATHEXIST(td, args->path, &path); + if (!LUSECONVPATH(td) && !SDT_PROBES_ENABLED()) { + error = kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, + CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), 0); + } else { + LCONVPATHEXIST(td, args->path, &path); + /* + * The DTrace probes have to be after the LCONVPATHEXIST, as + * LCONVPATHEXIST may return on its own and we do not want to + * have a stray entry without the corresponding return. + */ + LIN_SDT_PROBE3(uid16, linux_chown16, entry, args->path, args->uid, + args->gid); + LIN_SDT_PROBE1(uid16, linux_chown16, conv_path, path); - /* - * The DTrace probes have to be after the LCONVPATHEXIST, as - * LCONVPATHEXIST may return on its own and we do not want to - * have a stray entry without the corresponding return. - */ - LIN_SDT_PROBE3(uid16, linux_chown16, entry, args->path, args->uid, - args->gid); - LIN_SDT_PROBE1(uid16, linux_chown16, conv_path, path); - - error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, - CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), 0); - LFREEPATH(path); - - LIN_SDT_PROBE1(uid16, linux_chown16, return, error); + error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, + CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), 0); + LFREEPATH(path); + LIN_SDT_PROBE1(uid16, linux_chown16, return, error); + } return (error); } int linux_lchown16(struct thread *td, struct linux_lchown16_args *args) { char *path; int error; - LCONVPATHEXIST(td, args->path, &path); + if (!LUSECONVPATH(td) && !SDT_PROBES_ENABLED()) { + error = kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, + CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), AT_SYMLINK_NOFOLLOW); + } else { + LCONVPATHEXIST(td, args->path, &path); - /* - * The DTrace probes have to be after the LCONVPATHEXIST, as - * LCONVPATHEXIST may return on its own and we do not want to - * have a stray entry without the corresponding return. - */ - LIN_SDT_PROBE3(uid16, linux_lchown16, entry, args->path, args->uid, - args->gid); - LIN_SDT_PROBE1(uid16, linux_lchown16, conv_path, path); + /* + * The DTrace probes have to be after the LCONVPATHEXIST, as + * LCONVPATHEXIST may return on its own and we do not want to + * have a stray entry without the corresponding return. + */ + LIN_SDT_PROBE3(uid16, linux_lchown16, entry, args->path, args->uid, + args->gid); + LIN_SDT_PROBE1(uid16, linux_lchown16, conv_path, path); - error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, - CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), AT_SYMLINK_NOFOLLOW); - LFREEPATH(path); - - LIN_SDT_PROBE1(uid16, linux_lchown16, return, error); + error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, + CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), AT_SYMLINK_NOFOLLOW); + LFREEPATH(path); + LIN_SDT_PROBE1(uid16, linux_lchown16, return, error); + } return (error); } int linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) { struct ucred *newcred, *oldcred; l_gid16_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; LIN_SDT_PROBE2(uid16, linux_setgroups16, entry, args->gidsetsize, args->gidset); ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) { LIN_SDT_PROBE1(uid16, linux_setgroups16, return, EINVAL); return (EINVAL); } linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->gidset, linux_gidset, ngrp * sizeof(l_gid16_t)); if (error) { LIN_SDT_PROBE1(uid16, linux_setgroups16, copyin_error, error); LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error); free(linux_gidset, M_LINUX); return (error); } newcred = crget(); p = td->td_proc; PROC_LOCK(p); oldcred = crcopysafe(p, newcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { PROC_UNLOCK(p); crfree(newcred); LIN_SDT_PROBE1(uid16, linux_setgroups16, priv_check_cred_error, error); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(td->td_proc); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error); return (error); } int linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args) { struct ucred *cred; l_gid16_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; LIN_SDT_PROBE2(uid16, linux_getgroups16, entry, args->gidsetsize, args->gidset); cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; LIN_SDT_PROBE1(uid16, linux_getgroups16, return, 0); return (0); } if (ngrp < bsd_gidsetsz) { LIN_SDT_PROBE1(uid16, linux_getgroups16, return, EINVAL); return (EINVAL); } ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->gidset, ngrp * sizeof(l_gid16_t)); free(linux_gidset, M_LINUX); if (error) { LIN_SDT_PROBE1(uid16, linux_getgroups16, copyout_error, error); LIN_SDT_PROBE1(uid16, linux_getgroups16, return, error); return (error); } td->td_retval[0] = ngrp; LIN_SDT_PROBE1(uid16, linux_getgroups16, return, 0); return (0); } int linux_getgid16(struct thread *td, struct linux_getgid16_args *args) { LIN_SDT_PROBE0(uid16, linux_getgid16, entry); td->td_retval[0] = td->td_ucred->cr_rgid; LIN_SDT_PROBE1(uid16, linux_getgid16, return, 0); return (0); } int linux_getuid16(struct thread *td, struct linux_getuid16_args *args) { LIN_SDT_PROBE0(uid16, linux_getuid16, entry); td->td_retval[0] = td->td_ucred->cr_ruid; LIN_SDT_PROBE1(uid16, linux_getuid16, return, 0); return (0); } int linux_getegid16(struct thread *td, struct linux_getegid16_args *args) { struct getegid_args bsd; int error; LIN_SDT_PROBE0(uid16, linux_getegid16, entry); error = sys_getegid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_getegid16, return, error); return (error); } int linux_geteuid16(struct thread *td, struct linux_geteuid16_args *args) { struct geteuid_args bsd; int error; LIN_SDT_PROBE0(uid16, linux_geteuid16, entry); error = sys_geteuid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_geteuid16, return, error); return (error); } int linux_setgid16(struct thread *td, struct linux_setgid16_args *args) { struct setgid_args bsd; int error; LIN_SDT_PROBE1(uid16, linux_setgid16, entry, args->gid); bsd.gid = args->gid; error = sys_setgid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_setgid16, return, error); return (error); } int linux_setuid16(struct thread *td, struct linux_setuid16_args *args) { struct setuid_args bsd; int error; LIN_SDT_PROBE1(uid16, linux_setuid16, entry, args->uid); bsd.uid = args->uid; error = sys_setuid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_setuid16, return, error); return (error); } int linux_setregid16(struct thread *td, struct linux_setregid16_args *args) { struct setregid_args bsd; int error; LIN_SDT_PROBE2(uid16, linux_setregid16, entry, args->rgid, args->egid); bsd.rgid = CAST_NOCHG(args->rgid); bsd.egid = CAST_NOCHG(args->egid); error = sys_setregid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_setregid16, return, error); return (error); } int linux_setreuid16(struct thread *td, struct linux_setreuid16_args *args) { struct setreuid_args bsd; int error; LIN_SDT_PROBE2(uid16, linux_setreuid16, entry, args->ruid, args->euid); bsd.ruid = CAST_NOCHG(args->ruid); bsd.euid = CAST_NOCHG(args->euid); error = sys_setreuid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_setreuid16, return, error); return (error); } int linux_setresgid16(struct thread *td, struct linux_setresgid16_args *args) { struct setresgid_args bsd; int error; LIN_SDT_PROBE3(uid16, linux_setresgid16, entry, args->rgid, args->egid, args->sgid); bsd.rgid = CAST_NOCHG(args->rgid); bsd.egid = CAST_NOCHG(args->egid); bsd.sgid = CAST_NOCHG(args->sgid); error = sys_setresgid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_setresgid16, return, error); return (error); } int linux_setresuid16(struct thread *td, struct linux_setresuid16_args *args) { struct setresuid_args bsd; int error; LIN_SDT_PROBE3(uid16, linux_setresuid16, entry, args->ruid, args->euid, args->suid); bsd.ruid = CAST_NOCHG(args->ruid); bsd.euid = CAST_NOCHG(args->euid); bsd.suid = CAST_NOCHG(args->suid); error = sys_setresuid(td, &bsd); LIN_SDT_PROBE1(uid16, linux_setresuid16, return, error); return (error); } Index: head/sys/compat/linux/linux_util.h =================================================================== --- head/sys/compat/linux/linux_util.h (revision 364365) +++ head/sys/compat/linux/linux_util.h (revision 364366) @@ -1,154 +1,157 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1994 Christos Zoulas * Copyright (c) 1995 Frank van der Linden * Copyright (c) 1995 Scott Bartram * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: svr4_util.h,v 1.5 1994/11/18 02:54:31 christos Exp * from: linux_util.h,v 1.2 1995/03/05 23:23:50 fvdl Exp * $FreeBSD$ */ #ifndef _LINUX_UTIL_H_ #define _LINUX_UTIL_H_ #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_LINUX); MALLOC_DECLARE(M_EPOLL); MALLOC_DECLARE(M_FUTEX); MALLOC_DECLARE(M_FUTEX_WP); extern char linux_emul_path[]; +extern int linux_use_emul_path; int linux_emul_convpath(struct thread *, const char *, enum uio_seg, char **, int, int); + +#define LUSECONVPATH(td) atomic_load_int(&linux_use_emul_path) #define LCONVPATH_AT(td, upath, pathp, i, dfd) \ do { \ int _error; \ \ _error = linux_emul_convpath(td, upath, UIO_USERSPACE, \ pathp, i, dfd); \ if (*(pathp) == NULL) \ return (_error); \ } while (0) #define LCONVPATH(td, upath, pathp, i) \ LCONVPATH_AT(td, upath, pathp, i, AT_FDCWD) #define LCONVPATHEXIST(td, upath, pathp) LCONVPATH(td, upath, pathp, 0) #define LCONVPATHEXIST_AT(td, upath, pathp, dfd) LCONVPATH_AT(td, upath, pathp, 0, dfd) #define LCONVPATHCREAT(td, upath, pathp) LCONVPATH(td, upath, pathp, 1) #define LCONVPATHCREAT_AT(td, upath, pathp, dfd) LCONVPATH_AT(td, upath, pathp, 1, dfd) #define LFREEPATH(path) free(path, M_TEMP) #define DUMMY(s) \ LIN_SDT_PROBE_DEFINE0(dummy, s, entry); \ LIN_SDT_PROBE_DEFINE0(dummy, s, not_implemented); \ LIN_SDT_PROBE_DEFINE1(dummy, s, return, "int"); \ int \ linux_ ## s(struct thread *td, struct linux_ ## s ## _args *args) \ { \ static pid_t pid; \ \ LIN_SDT_PROBE0(dummy, s, entry); \ \ if (pid != td->td_proc->p_pid) { \ linux_msg(td, "syscall %s not implemented", #s); \ LIN_SDT_PROBE0(dummy, s, not_implemented); \ pid = td->td_proc->p_pid; \ }; \ \ LIN_SDT_PROBE1(dummy, s, return, ENOSYS); \ return (ENOSYS); \ } \ struct __hack /* * This is for the syscalls that are not even yet implemented in Linux. * * They're marked as UNIMPL in syscall.master so it will * have nosys record in linux_sysent[]. */ #define UNIMPLEMENTED(s) void linux_msg(const struct thread *td, const char *fmt, ...) __printflike(2, 3); struct linux_device_handler { char *bsd_driver_name; char *linux_driver_name; char *bsd_device_name; char *linux_device_name; int linux_major; int linux_minor; int linux_char_device; }; int linux_device_register_handler(struct linux_device_handler *h); int linux_device_unregister_handler(struct linux_device_handler *h); char *linux_driver_get_name_dev(device_t dev); int linux_driver_get_major_minor(const char *node, int *major, int *minor); char *linux_get_char_devices(void); void linux_free_get_char_devices(char *string); #if defined(KTR) #define KTR_LINUX KTR_SUBSYS #define LINUX_CTRFMT(nm, fmt) #nm"("fmt")" #define LINUX_CTR6(f, m, p1, p2, p3, p4, p5, p6) do { \ CTR6(KTR_LINUX, LINUX_CTRFMT(f, m), \ p1, p2, p3, p4, p5, p6); \ } while (0) #define LINUX_CTR(f) LINUX_CTR6(f, "", 0, 0, 0, 0, 0, 0) #define LINUX_CTR0(f, m) LINUX_CTR6(f, m, 0, 0, 0, 0, 0, 0) #define LINUX_CTR1(f, m, p1) LINUX_CTR6(f, m, p1, 0, 0, 0, 0, 0) #define LINUX_CTR2(f, m, p1, p2) LINUX_CTR6(f, m, p1, p2, 0, 0, 0, 0) #define LINUX_CTR3(f, m, p1, p2, p3) LINUX_CTR6(f, m, p1, p2, p3, 0, 0, 0) #define LINUX_CTR4(f, m, p1, p2, p3, p4) LINUX_CTR6(f, m, p1, p2, p3, p4, 0, 0) #define LINUX_CTR5(f, m, p1, p2, p3, p4, p5) LINUX_CTR6(f, m, p1, p2, p3, p4, p5, 0) #else #define LINUX_CTR(f) #define LINUX_CTR0(f, m) #define LINUX_CTR1(f, m, p1) #define LINUX_CTR2(f, m, p1, p2) #define LINUX_CTR3(f, m, p1, p2, p3) #define LINUX_CTR4(f, m, p1, p2, p3, p4) #define LINUX_CTR5(f, m, p1, p2, p3, p4, p5) #define LINUX_CTR6(f, m, p1, p2, p3, p4, p5, p6) #endif #endif /* !_LINUX_UTIL_H_ */ Index: head/sys/i386/linux/linux_machdep.c =================================================================== --- head/sys/i386/linux/linux_machdep.c (revision 364365) +++ head/sys/i386/linux/linux_machdep.c (revision 364366) @@ -1,737 +1,741 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needed for pcb definition in linux_set_thread_area */ #include "opt_posix.h" extern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */ struct l_descriptor { l_uint entry_number; l_ulong base_addr; l_uint limit; l_uint seg_32bit:1; l_uint contents:2; l_uint read_exec_only:1; l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; }; struct l_old_select_argv { l_int nfds; l_fd_set *readfds; l_fd_set *writefds; l_fd_set *exceptfds; struct l_timeval *timeout; }; int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *newpath; int error; - LCONVPATHEXIST(td, args->path, &newpath); - - error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, - args->argp, args->envp); - LFREEPATH(newpath); + if (!LUSECONVPATH(td)) { + error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, + args->argp, args->envp); + } else { + LCONVPATHEXIST(td, args->path, &newpath); + error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, + args->argp, args->envp); + LFREEPATH(newpath); + } if (error == 0) error = linux_common_execve(td, &eargs); return (error); } struct l_ipc_kludge { struct l_msgbuf *msgp; l_long msgtyp; }; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = PTRIN(args->ptr); a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = PTRIN(args->ptr); a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == 0) return (EINVAL); error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp)); if (error) return (error); a.msgp = PTRIN(tmp.msgp); a.msgtyp = tmp.msgtyp; } else { a.msgp = PTRIN(args->ptr); a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; l_uintptr_t addr; int error; a.shmid = args->arg1; a.shmaddr = PTRIN(args->ptr); a.shmflg = args->arg2; error = linux_shmat(td, &a); if (error != 0) return (error); addr = td->td_retval[0]; error = copyout(&addr, PTRIN(args->arg3), sizeof(addr)); td->td_retval[0] = 0; return (error); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = PTRIN(args->ptr); return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = linux_args.readfds; newsel.writefds = linux_args.writefds; newsel.exceptfds = linux_args.exceptfds; newsel.timeout = linux_args.timeout; return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct segment_descriptor sd; struct l_user_desc info; int idx, error; int a[2]; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { linux_msg(td, "set_cloned_tls copyin failed!"); } else { idx = info.entry_number; /* * looks like we're getting the idx we returned * in the set_thread_area() syscall */ if (idx != 6 && idx != 3) { linux_msg(td, "set_cloned_tls resetting idx!"); idx = 3; } /* this doesnt happen in practice */ if (idx == 6) { /* we might copy out the entry_number as 3 */ info.entry_number = 3; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) linux_msg(td, "set_cloned_tls copyout failed!"); } a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); memcpy(&sd, &a, sizeof(a)); /* set %gs */ td->td_pcb->pcb_gsd = sd; td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); } return (error); } int linux_set_upcall_kse(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_esp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent do. */ td->td_frame->tf_eax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, args->addr, args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_ioperm(struct thread *td, struct linux_ioperm_args *args) { int error; struct i386_ioperm_args iia; iia.start = args->start; iia.length = args->length; iia.enable = args->enable; error = i386_set_ioperm(td, &iia); return (error); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) { int error; struct i386_ldt_args ldt; struct l_descriptor ld; union descriptor desc; int size, written; switch (uap->func) { case 0x00: /* read_ldt */ ldt.start = 0; ldt.descs = uap->ptr; ldt.num = uap->bytecount / sizeof(union descriptor); error = i386_get_ldt(td, &ldt); td->td_retval[0] *= sizeof(union descriptor); break; case 0x02: /* read_default_ldt = 0 */ size = 5*sizeof(struct l_desc_struct); if (size > uap->bytecount) size = uap->bytecount; for (written = error = 0; written < size && error == 0; written++) error = subyte((char *)uap->ptr + written, 0); td->td_retval[0] = written; break; case 0x01: /* write_ldt */ case 0x11: /* write_ldt */ if (uap->bytecount != sizeof(ld)) return (EINVAL); error = copyin(uap->ptr, &ld, sizeof(ld)); if (error) return (error); ldt.start = ld.entry_number; ldt.descs = &desc; ldt.num = 1; desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | (ld.contents << 2); desc.sd.sd_dpl = 3; desc.sd.sd_p = (ld.seg_not_present ^ 1); desc.sd.sd_xx = 0; desc.sd.sd_def32 = ld.seg_32bit; desc.sd.sd_gran = ld.limit_in_pages; error = i386_set_ldt(td, &ldt, &desc); break; default: error = ENOSYS; break; } if (error == EOPNOTSUPP) { linux_msg(td, "modify_ldt needs kernel option USER_LDT"); error = ENOSYS; } return (error); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__mask = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__mask; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We dont use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; LINUX_SIGEMPTYSET(mask); mask.__mask = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = lss.ss_sp; ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = oss.ss_sp; lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; int error; int idx; int a[2]; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* * Semantics of Linux version: every thread in the system has array of * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This * syscall loads one of the selected tls decriptors with a value and * also loads GDT descriptors 6, 7 and 8 with the content of the * per-thread descriptors. * * Semantics of FreeBSD version: I think we can ignore that Linux has 3 * per-thread descriptors and use just the 1st one. The tls_array[] * is used only in set/get-thread_area() syscalls and for loading the * GDT descriptors. In FreeBSD we use just one GDT descriptor for TLS * so we will load just one. * * XXX: this doesn't work when a user space process tries to use more * than 1 TLS segment. Comment in the Linux sources says wine might do * this. */ /* * we support just GLIBC TLS now * we should let 3 proceed as well because we use this segment so * if code does two subsequent calls it should succeed */ if (idx != 6 && idx != -1 && idx != 3) return (EINVAL); /* * we have to copy out the GDT entry we use * FreeBSD uses GDT entry #3 for storing %gs so load that * * XXX: what if a user space program doesn't check this value and tries * to use 6, 7 or 8? */ idx = info.entry_number = 3; error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); if (LINUX_LDT_empty(&info)) { a[0] = 0; a[1] = 0; } else { a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); } memcpy(&sd, &a, sizeof(a)); /* this is taken from i386 version of cpu_set_user_tls() */ critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); critical_exit(); return (0); } int linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) { struct l_user_desc info; int error; int idx; struct l_desc_struct desc; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* XXX: I am not sure if we want 3 to be allowed too. */ if (idx != 6 && idx != 3) return (EINVAL); idx = 3; memset(&info, 0, sizeof(info)); sd = PCPU_GET(fsgs_gdt)[1]; memcpy(&desc, &sd, sizeof(desc)); info.entry_number = idx; info.base_addr = LINUX_GET_BASE(&desc); info.limit = LINUX_GET_LIMIT(&desc); info.seg_32bit = LINUX_GET_32BIT(&desc); info.contents = LINUX_GET_CONTENTS(&desc); info.read_exec_only = !LINUX_GET_WRITABLE(&desc); info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); info.seg_not_present = !LINUX_GET_PRESENT(&desc); info.useable = LINUX_GET_USEABLE(&desc); error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (EFAULT); return (0); } /* XXX: this wont work with module - convert it */ int linux_mq_open(struct thread *td, struct linux_mq_open_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_open(td, (struct kmq_open_args *)args)); #else return (ENOSYS); #endif } int linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_unlink(td, (struct kmq_unlink_args *)args)); #else return (ENOSYS); #endif } int linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_timedsend(td, (struct kmq_timedsend_args *)args)); #else return (ENOSYS); #endif } int linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_timedreceive(td, (struct kmq_timedreceive_args *)args)); #else return (ENOSYS); #endif } int linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_notify(td, (struct kmq_notify_args *)args)); #else return (ENOSYS); #endif } int linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_setattr(td, (struct kmq_setattr_args *)args)); #else return (ENOSYS); #endif }