diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c index 67429a1c049f..18be62dab9ae 100644 --- a/sys/amd64/linux/linux_machdep.c +++ b/sys/amd64/linux/linux_machdep.c @@ -1,414 +1,414 @@ /*- * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * Copyright (c) 2013 Dmitry Chagin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LINUX_ARCH_AMD64 0xc000003e int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *path; int error; LINUX_CTR(execve); if (!LUSECONVPATH(td)) { error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, args->argp, args->envp); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, args->envp); LFREEPATH(path); } if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_rsp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent does. */ td->td_frame->tf_rax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, args->addr, args->len, args->prot, args->flags, args->fd, args->pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, uap->addr, uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, uap->addr, uap->len, uap->behav)); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; LINUX_CTR(iopl); if (args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; LINUX_CTR(pause); PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_arch_prctl(struct thread *td, struct linux_arch_prctl_args *args) { unsigned long long cet[3]; struct pcb *pcb; int error; pcb = td->td_pcb; LINUX_CTR2(arch_prctl, "0x%x, %p", args->code, args->addr); switch (args->code) { case LINUX_ARCH_SET_GS: if (args->addr < VM_MAXUSER_ADDRESS) { update_pcb_bases(pcb); pcb->pcb_gsbase = args->addr; td->td_frame->tf_gs = _ugssel; error = 0; } else error = EPERM; break; case LINUX_ARCH_SET_FS: if (args->addr < VM_MAXUSER_ADDRESS) { update_pcb_bases(pcb); pcb->pcb_fsbase = args->addr; td->td_frame->tf_fs = _ufssel; error = 0; } else error = EPERM; break; case LINUX_ARCH_GET_FS: error = copyout(&pcb->pcb_fsbase, PTRIN(args->addr), sizeof(args->addr)); break; case LINUX_ARCH_GET_GS: error = copyout(&pcb->pcb_gsbase, PTRIN(args->addr), sizeof(args->addr)); break; case LINUX_ARCH_CET_STATUS: memset(cet, 0, sizeof(cet)); error = copyout(&cet, PTRIN(args->addr), sizeof(cet)); break; default: linux_msg(td, "unsupported arch_prctl code %#x", args->code); error = EINVAL; } return (error); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct pcb *pcb; if ((uint64_t)desc >= VM_MAXUSER_ADDRESS) return (EPERM); pcb = td->td_pcb; update_pcb_bases(pcb); pcb->pcb_fsbase = (register_t)desc; td->td_frame->tf_fs = _ufssel; return (0); } int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xchgl_smap : futex_xchgl_nosmap); } int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_addl_smap : futex_addl_nosmap); } int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_orl_smap : futex_orl_nosmap); } int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_andl_smap : futex_andl_nosmap); } int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xorl_smap : futex_xorl_nosmap); } void bsd_to_linux_regset(const struct reg *b_reg, struct linux_pt_regset *l_regset) { l_regset->r15 = b_reg->r_r15; l_regset->r14 = b_reg->r_r14; l_regset->r13 = b_reg->r_r13; l_regset->r12 = b_reg->r_r12; l_regset->rbp = b_reg->r_rbp; l_regset->rbx = b_reg->r_rbx; l_regset->r11 = b_reg->r_r11; l_regset->r10 = b_reg->r_r10; l_regset->r9 = b_reg->r_r9; l_regset->r8 = b_reg->r_r8; l_regset->rax = b_reg->r_rax; l_regset->rcx = b_reg->r_rcx; l_regset->rdx = b_reg->r_rdx; l_regset->rsi = b_reg->r_rsi; l_regset->rdi = b_reg->r_rdi; l_regset->orig_rax = b_reg->r_rax; l_regset->rip = b_reg->r_rip; l_regset->cs = b_reg->r_cs; l_regset->eflags = b_reg->r_rflags; l_regset->rsp = b_reg->r_rsp; l_regset->ss = b_reg->r_ss; l_regset->fs_base = 0; l_regset->gs_base = 0; l_regset->ds = b_reg->r_ds; l_regset->es = b_reg->r_es; l_regset->fs = b_reg->r_fs; l_regset->gs = b_reg->r_gs; } void linux_to_bsd_regset(struct reg *b_reg, const struct linux_pt_regset *l_regset) { b_reg->r_r15 = l_regset->r15; b_reg->r_r14 = l_regset->r14; b_reg->r_r13 = l_regset->r13; b_reg->r_r12 = l_regset->r12; b_reg->r_rbp = l_regset->rbp; b_reg->r_rbx = l_regset->rbx; b_reg->r_r11 = l_regset->r11; b_reg->r_r10 = l_regset->r10; b_reg->r_r9 = l_regset->r9; b_reg->r_r8 = l_regset->r8; b_reg->r_rax = l_regset->rax; b_reg->r_rcx = l_regset->rcx; b_reg->r_rdx = l_regset->rdx; b_reg->r_rsi = l_regset->rsi; b_reg->r_rdi = l_regset->rdi; b_reg->r_rax = l_regset->orig_rax; b_reg->r_rip = l_regset->rip; b_reg->r_cs = l_regset->cs; b_reg->r_rflags = l_regset->eflags; b_reg->r_rsp = l_regset->rsp; b_reg->r_ss = l_regset->ss; b_reg->r_ds = l_regset->ds; b_reg->r_es = l_regset->es; b_reg->r_fs = l_regset->fs; b_reg->r_gs = l_regset->gs; } void linux_ptrace_get_syscall_info_machdep(const struct reg *reg, struct syscall_info *si) { si->arch = LINUX_ARCH_AMD64; si->instruction_pointer = reg->r_rip; si->stack_pointer = reg->r_rsp; } int linux_ptrace_getregs_machdep(struct thread *td, pid_t pid, struct linux_pt_regset *l_regset) { struct ptrace_lwpinfo lwpinfo; struct pcb *pcb; int error; pcb = td->td_pcb; if (td == curthread) update_pcb_bases(pcb); l_regset->fs_base = pcb->pcb_fsbase; l_regset->gs_base = pcb->pcb_gsbase; error = kern_ptrace(td, PT_LWPINFO, pid, &lwpinfo, sizeof(lwpinfo)); if (error != 0) { linux_msg(td, "PT_LWPINFO failed with error %d", error); return (error); } if ((lwpinfo.pl_flags & PL_FLAG_SCE) != 0) { /* * Undo the mangling done in exception.S:fast_syscall_common(). */ l_regset->r10 = l_regset->rcx; } if ((lwpinfo.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)) != 0) { /* * In Linux, the syscall number - passed to the syscall * as rax - is preserved in orig_rax; rax gets overwritten * with syscall return value. */ l_regset->orig_rax = lwpinfo.pl_syscall_code; } return (0); } diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index 86f7bdc918d8..9dd988452a1c 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -1,747 +1,747 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); struct l_old_select_argv { l_int nfds; l_uintptr_t readfds; l_uintptr_t writefds; l_uintptr_t exceptfds; l_uintptr_t timeout; } __packed; static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) { lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; lru->ru_maxrss = ru->ru_maxrss; lru->ru_ixrss = ru->ru_ixrss; lru->ru_idrss = ru->ru_idrss; lru->ru_isrss = ru->ru_isrss; lru->ru_minflt = ru->ru_minflt; lru->ru_majflt = ru->ru_majflt; lru->ru_nswap = ru->ru_nswap; lru->ru_inblock = ru->ru_inblock; lru->ru_oublock = ru->ru_oublock; lru->ru_msgsnd = ru->ru_msgsnd; lru->ru_msgrcv = ru->ru_msgrcv; lru->ru_nsignals = ru->ru_nsignals; lru->ru_nvcsw = ru->ru_nvcsw; lru->ru_nivcsw = ru->ru_nivcsw; } int linux_copyout_rusage(struct rusage *ru, void *uaddr) { struct l_rusage lru; bsd_to_linux_rusage(ru, &lru); return (copyout(&lru, uaddr, sizeof(struct l_rusage))); } int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *path; int error; - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, args->envp); free(path, M_TEMP); if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } CTASSERT(sizeof(struct l_iovec32) == 8); int linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) { struct l_iovec32 iov32; struct iovec *iov; struct uio *uio; uint32_t iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, int error) { struct l_iovec32 iov32; struct iovec *iov; uint32_t iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return(0); } int linux_readv(struct thread *td, struct linux_readv_args *uap) { struct uio *auio; int error; error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int linux_writev(struct thread *td, struct linux_writev_args *uap) { struct uio *auio; int error; error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } struct l_ipc_kludge { l_uintptr_t msgp; l_long msgtyp; } __packed; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = PTRIN(args->ptr); a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = PTRIN(args->ptr); a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == 0) return (EINVAL); error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp)); if (error) return (error); a.msgp = PTRIN(tmp.msgp); a.msgtyp = tmp.msgtyp; } else { a.msgp = PTRIN(args->ptr); a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; l_uintptr_t addr; int error; a.shmid = args->arg1; a.shmaddr = PTRIN(args->ptr); a.shmflg = args->arg2; error = linux_shmat(td, &a); if (error != 0) return (error); addr = td->td_retval[0]; error = copyout(&addr, PTRIN(args->arg3), sizeof(addr)); td->td_retval[0] = 0; return (error); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = PTRIN(args->ptr); return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = PTRIN(linux_args.readfds); newsel.writefds = PTRIN(linux_args.writefds); newsel.exceptfds = PTRIN(linux_args.exceptfds); newsel.timeout = PTRIN(linux_args.timeout); return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct l_user_desc info; struct pcb *pcb; int error; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { linux_msg(td, "set_cloned_tls copyin info failed!"); } else { /* We might copy out the entry_number as GUGS32_SEL. */ info.entry_number = GUGS32_SEL; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) linux_msg(td, "set_cloned_tls copyout info failed!"); pcb = td->td_pcb; update_pcb_bases(pcb); pcb->pcb_gsbase = (register_t)info.base_addr; td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); } return (error); } int linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_rsp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent do. */ td->td_frame->tf_rax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__mask = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__mask; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We don't use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; LINUX_SIGEMPTYSET(mask); mask.__mask = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) { struct timeval atv; l_timeval atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); atv32.tv_sec = atv.tv_sec; atv32.tv_usec = atv.tv_usec; error = copyout(&atv32, uap->tp, sizeof(atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof(rtz)); } return (error); } int linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) { l_timeval atv32; struct timeval atv, *tvp; struct timezone atz, *tzp; int error; if (uap->tp) { error = copyin(uap->tp, &atv32, sizeof(atv32)); if (error) return (error); atv.tv_sec = atv32.tv_sec; atv.tv_usec = atv32.tv_usec; tvp = &atv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &atz, sizeof(atz)); if (error) return (error); tzp = &atz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) { struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error != 0) return (error); if (uap->rusage != NULL) error = linux_copyout_rusage(&s, uap->rusage); return (error); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; struct pcb *pcb; int error; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); /* * Semantics of Linux version: every thread in the system has array * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. * This syscall loads one of the selected TLS decriptors with a value * and also loads GDT descriptors 6, 7 and 8 with the content of * the per-thread descriptors. * * Semantics of FreeBSD version: I think we can ignore that Linux has * three per-thread descriptors and use just the first one. * The tls_array[] is used only in [gs]et_thread_area() syscalls and * for loading the GDT descriptors. We use just one GDT descriptor * for TLS, so we will load just one. * * XXX: This doesn't work when a user space process tries to use more * than one TLS segment. Comment in the Linux source says wine might * do this. */ /* * GLIBC reads current %gs and call set_thread_area() with it. * We should let GUDATA_SEL and GUGS32_SEL proceed as well because * we use these segments. */ switch (info.entry_number) { case GUGS32_SEL: case GUDATA_SEL: case 6: case -1: info.entry_number = GUGS32_SEL; break; default: return (EINVAL); } /* * We have to copy out the GDT entry we use. * * XXX: What if a user space program does not check the return value * and tries to use 6, 7 or 8? */ error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); pcb = td->td_pcb; update_pcb_bases(pcb); pcb->pcb_gsbase = (register_t)info.base_addr; update_gdt_gsbase(td, info.base_addr); return (0); } void bsd_to_linux_regset32(const struct reg32 *b_reg, struct linux_pt_regset32 *l_regset) { l_regset->ebx = b_reg->r_ebx; l_regset->ecx = b_reg->r_ecx; l_regset->edx = b_reg->r_edx; l_regset->esi = b_reg->r_esi; l_regset->edi = b_reg->r_edi; l_regset->ebp = b_reg->r_ebp; l_regset->eax = b_reg->r_eax; l_regset->ds = b_reg->r_ds; l_regset->es = b_reg->r_es; l_regset->fs = b_reg->r_fs; l_regset->gs = b_reg->r_gs; l_regset->orig_eax = b_reg->r_eax; l_regset->eip = b_reg->r_eip; l_regset->cs = b_reg->r_cs; l_regset->eflags = b_reg->r_eflags; l_regset->esp = b_reg->r_esp; l_regset->ss = b_reg->r_ss; } int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xchgl_smap : futex_xchgl_nosmap); } int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_addl_smap : futex_addl_nosmap); } int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_orl_smap : futex_orl_nosmap); } int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_andl_smap : futex_andl_nosmap); } int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xorl_smap : futex_xorl_nosmap); } diff --git a/sys/arm64/linux/linux_machdep.c b/sys/arm64/linux/linux_machdep.c index 317807ba2e45..420f50cb3055 100644 --- a/sys/arm64/linux/linux_machdep.c +++ b/sys/arm64/linux/linux_machdep.c @@ -1,184 +1,184 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Turing Robotic Industries Inc. * Copyright (c) 2000 Marcel Moolenaar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LINUX_ARCH_AARCH64 0xc00000b7 /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /* DTrace probes */ LIN_SDT_PROBE_DEFINE0(machdep, linux_mmap2, todo); /* * LINUXTODO: deduplicate; linux_execve is common across archs, except that on * amd64 compat linuxulator it calls freebsd32_exec_copyin_args. */ int linux_execve(struct thread *td, struct linux_execve_args *uap) { struct image_args eargs; char *path; int error; if (!LUSECONVPATH(td)) { error = exec_copyin_args(&eargs, uap->path, UIO_USERSPACE, uap->argp, uap->envp); } else { - LCONVPATHEXIST(td, uap->path, &path); + LCONVPATHEXIST(uap->path, &path); error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); LFREEPATH(path); } if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_sp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent does. */ td->td_frame->tf_x[0] = 0; return (0); } /* LINUXTODO: deduplicate arm64 linux_mmap2 */ int linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) { LIN_SDT_PROBE0(machdep, linux_mmap2, todo); return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, uap->flags, uap->fd, uap->pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_set_cloned_tls(struct thread *td, void *desc) { if ((uint64_t)desc >= VM_MAXUSER_ADDRESS) return (EPERM); return (cpu_set_user_tls(td, desc)); } void bsd_to_linux_regset(const struct reg *b_reg, struct linux_pt_regset *l_regset) { KASSERT(sizeof(l_regset->x) == sizeof(b_reg->x) + sizeof(l_ulong), ("%s: size mismatch\n", __func__)); memcpy(l_regset->x, b_reg->x, sizeof(b_reg->x)); l_regset->x[30] = b_reg->lr; l_regset->sp = b_reg->sp; l_regset->pc = b_reg->elr; l_regset->cpsr = b_reg->spsr; } void linux_to_bsd_regset(struct reg *b_reg, const struct linux_pt_regset *l_regset) { KASSERT(sizeof(l_regset->x) == sizeof(b_reg->x) + sizeof(l_ulong), ("%s: size mismatch\n", __func__)); memcpy(b_reg->x, l_regset->x, sizeof(b_reg->x)); b_reg->sp = l_regset->sp; b_reg->elr = l_regset->pc; b_reg->spsr = l_regset->cpsr; } void linux_ptrace_get_syscall_info_machdep(const struct reg *reg, struct syscall_info *si) { si->arch = LINUX_ARCH_AARCH64; si->instruction_pointer = reg->lr; si->stack_pointer = reg->sp; } int linux_ptrace_getregs_machdep(struct thread *td __unused, pid_t pid __unused, struct linux_pt_regset *l_regset __unused) { return (0); } diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c index 86688d50c8a8..e7576ed2b16f 100644 --- a/sys/compat/linux/linux_emul.c +++ b/sys/compat/linux/linux_emul.c @@ -1,383 +1,382 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1996 Søren Schmidt * Copyright (c) 2006 Roman Divacky * All rights reserved. * Copyright (c) 2013 Dmitry Chagin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif /* * This returns reference to the thread emuldata entry (if found) * * Hold PROC_LOCK when referencing emuldata from other threads. */ struct linux_emuldata * em_find(struct thread *td) { struct linux_emuldata *em; em = td->td_emuldata; return (em); } /* * This returns reference to the proc pemuldata entry (if found) * * Hold PROC_LOCK when referencing proc pemuldata from other threads. * Hold LINUX_PEM_LOCK wher referencing pemuldata members. */ struct linux_pemuldata * pem_find(struct proc *p) { struct linux_pemuldata *pem; pem = p->p_emuldata; return (pem); } /* * Linux apps generally expect the soft open file limit to be set * to 1024, often iterating over all the file descriptors up to that * limit instead of using closefrom(2). Give them what they want, * unless there already is a resource limit in place. */ static void linux_set_default_openfiles(struct thread *td, struct proc *p) { struct rlimit rlim; int error; if (linux_default_openfiles < 0) return; PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur != rlim.rlim_max || rlim.rlim_cur <= linux_default_openfiles) return; rlim.rlim_cur = linux_default_openfiles; error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim); KASSERT(error == 0, ("kern_proc_setrlimit failed")); } /* * The default stack size limit in Linux is 8MB. */ static void linux_set_default_stacksize(struct thread *td, struct proc *p) { struct rlimit rlim; int error; if (linux_default_stacksize < 0) return; PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur != rlim.rlim_max || rlim.rlim_cur <= linux_default_stacksize) return; rlim.rlim_cur = linux_default_stacksize; error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim); KASSERT(error == 0, ("kern_proc_setrlimit failed")); } void linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread) { struct linux_emuldata *em; struct linux_pemuldata *pem; struct proc *p; if (newtd != NULL) { p = newtd->td_proc; /* non-exec call */ em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO); if (init_thread) { LINUX_CTR1(proc_init, "thread newtd(%d)", newtd->td_tid); em->em_tid = newtd->td_tid; } else { LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid); em->em_tid = p->p_pid; pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO); sx_init(&pem->pem_sx, "lpemlk"); p->p_emuldata = pem; } newtd->td_emuldata = em; linux_set_default_openfiles(td, p); linux_set_default_stacksize(td, p); } else { p = td->td_proc; /* exec */ LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid); /* lookup the old one */ em = em_find(td); KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); em->em_tid = p->p_pid; em->flags = 0; em->robust_futexes = NULL; em->child_clear_tid = NULL; em->child_set_tid = NULL; pem = pem_find(p); KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n")); pem->persona = 0; } } void linux_on_exit(struct proc *p) { struct linux_pemuldata *pem; struct thread *td = curthread; MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX); LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p", td->td_tid, p->p_pid, p); pem = pem_find(p); if (pem == NULL) return; (p->p_sysent->sv_thread_detach)(td); p->p_emuldata = NULL; sx_destroy(&pem->pem_sx); free(pem, M_LINUX); } /* * If a Linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a Linux * binary is doing the exec, so we do not create an EXEC module for it. */ int linux_exec_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1; /* * The interpreter for shell scripts run from a Linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain Linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds attempt * to use the alternate path for the interpreter. If an * alternate path is found, use our stringspace to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { - linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), - imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, - AT_FDCWD); + linux_emul_convpath(imgp->interpreter_name, UIO_SYSSPACE, + &rpath, 0, AT_FDCWD); if (rpath != NULL) imgp->args->fname_buf = imgp->interpreter_name = rpath; } } return (error); } int linux_common_execve(struct thread *td, struct image_args *eargs) { struct linux_pemuldata *pem; struct vmspace *oldvmspace; struct linux_emuldata *em; struct proc *p; int error; p = td->td_proc; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = kern_execve(td, eargs, NULL, oldvmspace); post_execve(td, error, oldvmspace); if (error != EJUSTRETURN) return (error); /* * In a case of transition from Linux binary execing to * FreeBSD binary we destroy Linux emuldata thread & proc entries. */ if (SV_CURPROC_ABI() != SV_ABI_LINUX) { PROC_LOCK(p); em = em_find(td); KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n")); td->td_emuldata = NULL; pem = pem_find(p); KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n")); p->p_emuldata = NULL; PROC_UNLOCK(p); free(em, M_TEMP); free(pem, M_LINUX); } return (EJUSTRETURN); } void linux_on_exec(struct proc *p, struct image_params *imgp) { struct thread *td; struct thread *othertd; #if defined(__amd64__) struct linux_pemuldata *pem; #endif td = curthread; MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX); /* * When execing to Linux binary, we create Linux emuldata * thread entry. */ if (SV_PROC_ABI(p) == SV_ABI_LINUX) { /* * Process already was under Linuxolator * before exec. Update emuldata to reflect * single-threaded cleaned state after exec. */ linux_proc_init(td, NULL, false); } else { /* * We are switching the process to Linux emulator. */ linux_proc_init(td, td, false); /* * Create a transient td_emuldata for all suspended * threads, so that p->p_sysent->sv_thread_detach() == * linux_thread_detach() can find expected but unused * emuldata. */ FOREACH_THREAD_IN_PROC(td->td_proc, othertd) { if (othertd == td) continue; linux_proc_init(td, othertd, true); } } #if defined(__amd64__) /* * An IA32 executable which has executable stack will have the * READ_IMPLIES_EXEC personality flag set automatically. */ if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && imgp->stack_prot & VM_PROT_EXECUTE) { pem = pem_find(p); pem->persona |= LINUX_READ_IMPLIES_EXEC; } #endif } void linux_thread_dtor(struct thread *td) { struct linux_emuldata *em; em = em_find(td); if (em == NULL) return; td->td_emuldata = NULL; LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid); free(em, M_TEMP); } void linux_schedtail(struct thread *td) { struct linux_emuldata *em; struct proc *p; int error = 0; int *child_set_tid; p = td->td_proc; em = em_find(td); KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n")); child_set_tid = em->child_set_tid; if (child_set_tid != NULL) { error = copyout(&em->em_tid, child_set_tid, sizeof(em->em_tid)); LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d", td->td_tid, child_set_tid, em->em_tid, error); } else LINUX_CTR1(schedtail, "thread(%d)", em->em_tid); } diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index bda843286b26..285faaf8927b 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -1,2081 +1,2081 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #include #else #include #include #endif #include #include #include static int linux_common_open(struct thread *, int, const char *, int, int, enum uio_seg); static int linux_do_accessat(struct thread *t, int, const char *, int, int); static int linux_getdents_error(struct thread *, int, int); static struct bsd_to_linux_bitmap seal_bitmap[] = { BITMAP_1t1_LINUX(F_SEAL_SEAL), BITMAP_1t1_LINUX(F_SEAL_SHRINK), BITMAP_1t1_LINUX(F_SEAL_GROW), BITMAP_1t1_LINUX(F_SEAL_WRITE), }; #define MFD_HUGETLB_ENTRY(_size) \ { \ .bsd_value = MFD_HUGE_##_size, \ .linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size \ } static struct bsd_to_linux_bitmap mfd_bitmap[] = { BITMAP_1t1_LINUX(MFD_CLOEXEC), BITMAP_1t1_LINUX(MFD_ALLOW_SEALING), BITMAP_1t1_LINUX(MFD_HUGETLB), MFD_HUGETLB_ENTRY(64KB), MFD_HUGETLB_ENTRY(512KB), MFD_HUGETLB_ENTRY(1MB), MFD_HUGETLB_ENTRY(2MB), MFD_HUGETLB_ENTRY(8MB), MFD_HUGETLB_ENTRY(16MB), MFD_HUGETLB_ENTRY(32MB), MFD_HUGETLB_ENTRY(256MB), MFD_HUGETLB_ENTRY(512MB), MFD_HUGETLB_ENTRY(1GB), MFD_HUGETLB_ENTRY(2GB), MFD_HUGETLB_ENTRY(16GB), }; #undef MFD_HUGETLB_ENTRY #ifdef LINUX_LEGACY_SYSCALLS int linux_creat(struct thread *td, struct linux_creat_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); LFREEPATH(path); return (error); } #endif static int linux_common_openflags(int l_flags) { int bsd_flags; bsd_flags = 0; switch (l_flags & LINUX_O_ACCMODE) { case LINUX_O_WRONLY: bsd_flags |= O_WRONLY; break; case LINUX_O_RDWR: bsd_flags |= O_RDWR; break; default: bsd_flags |= O_RDONLY; } if (l_flags & LINUX_O_NDELAY) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_APPEND) bsd_flags |= O_APPEND; if (l_flags & LINUX_O_SYNC) bsd_flags |= O_FSYNC; if (l_flags & LINUX_O_CLOEXEC) bsd_flags |= O_CLOEXEC; if (l_flags & LINUX_O_NONBLOCK) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_ASYNC) bsd_flags |= O_ASYNC; if (l_flags & LINUX_O_CREAT) bsd_flags |= O_CREAT; if (l_flags & LINUX_O_TRUNC) bsd_flags |= O_TRUNC; if (l_flags & LINUX_O_EXCL) bsd_flags |= O_EXCL; if (l_flags & LINUX_O_NOCTTY) bsd_flags |= O_NOCTTY; if (l_flags & LINUX_O_DIRECT) bsd_flags |= O_DIRECT; if (l_flags & LINUX_O_NOFOLLOW) bsd_flags |= O_NOFOLLOW; if (l_flags & LINUX_O_DIRECTORY) bsd_flags |= O_DIRECTORY; if (l_flags & LINUX_O_PATH) bsd_flags |= O_PATH; /* XXX LINUX_O_NOATIME: unable to be easily implemented. */ return (bsd_flags); } static int linux_common_open(struct thread *td, int dirfd, const char *path, int l_flags, int mode, enum uio_seg seg) { struct proc *p = td->td_proc; struct file *fp; int fd; int bsd_flags, error; bsd_flags = linux_common_openflags(l_flags); error = kern_openat(td, dirfd, path, seg, bsd_flags, mode); if (error != 0) { if (error == EMLINK) error = ELOOP; goto done; } if (p->p_flag & P_CONTROLT) goto done; if (bsd_flags & O_NOCTTY) goto done; /* * XXX In between kern_openat() and fget(), another process * having the same filedesc could use that fd without * checking below. */ fd = td->td_retval[0]; if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) { if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); goto done; } sx_slock(&proctree_lock); PROC_LOCK(p); if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* XXXPJD: Verify if TIOCSCTTY is allowed. */ (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); } else { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); } fdrop(fp, td); } done: return (error); } int linux_openat(struct thread *td, struct linux_openat_args *args) { char *path; int dfd, error; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (linux_common_open(td, dfd, args->filename, args->flags, args->mode, UIO_USERSPACE)); } if (args->flags & LINUX_O_CREAT) - LCONVPATH_AT(td, args->filename, &path, 1, dfd); + LCONVPATH_AT(args->filename, &path, 1, dfd); else - LCONVPATH_AT(td, args->filename, &path, 0, dfd); + LCONVPATH_AT(args->filename, &path, 0, dfd); error = linux_common_open(td, dfd, path, args->flags, args->mode, UIO_SYSSPACE); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_open(struct thread *td, struct linux_open_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (linux_common_open(td, AT_FDCWD, args->path, args->flags, args->mode, UIO_USERSPACE)); } if (args->flags & LINUX_O_CREAT) - LCONVPATHCREAT(td, args->path, &path); + LCONVPATHCREAT(args->path, &path); else - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = linux_common_open(td, AT_FDCWD, path, args->flags, args->mode, UIO_SYSSPACE); LFREEPATH(path); return (error); } #endif int linux_name_to_handle_at(struct thread *td, struct linux_name_to_handle_at_args *args) { static const l_int valid_flags = (LINUX_AT_SYMLINK_FOLLOW | LINUX_AT_EMPTY_PATH); static const l_uint fh_size = sizeof(fhandle_t); fhandle_t fh; l_uint fh_bytes; l_int mount_id; int error, fd, bsd_flags; if (args->flags & ~valid_flags) return (EINVAL); fd = args->dirfd; if (fd == LINUX_AT_FDCWD) fd = AT_FDCWD; bsd_flags = 0; if (!(args->flags & LINUX_AT_SYMLINK_FOLLOW)) bsd_flags |= AT_SYMLINK_NOFOLLOW; if ((args->flags & LINUX_AT_EMPTY_PATH) != 0) bsd_flags |= AT_EMPTY_PATH; if (!LUSECONVPATH(td)) { error = kern_getfhat(td, bsd_flags, fd, args->name, UIO_USERSPACE, &fh, UIO_SYSSPACE); } else { char *path; - LCONVPATH_AT(td, args->name, &path, 0, fd); + LCONVPATH_AT(args->name, &path, 0, fd); error = kern_getfhat(td, bsd_flags, fd, path, UIO_SYSSPACE, &fh, UIO_SYSSPACE); LFREEPATH(path); } if (error != 0) return (error); /* Emit mount_id -- required before EOVERFLOW case. */ mount_id = (fh.fh_fsid.val[0] ^ fh.fh_fsid.val[1]); error = copyout(&mount_id, args->mnt_id, sizeof(mount_id)); if (error != 0) return (error); /* Check if there is room for handle. */ error = copyin(&args->handle->handle_bytes, &fh_bytes, sizeof(fh_bytes)); if (error != 0) return (error); if (fh_bytes < fh_size) { error = copyout(&fh_size, &args->handle->handle_bytes, sizeof(fh_size)); if (error == 0) error = EOVERFLOW; return (error); } /* Emit handle. */ mount_id = 0; /* * We don't use handle_type for anything yet, but initialize a known * value. */ error = copyout(&mount_id, &args->handle->handle_type, sizeof(mount_id)); if (error != 0) return (error); error = copyout(&fh, &args->handle->f_handle, sizeof(fh)); return (error); } int linux_open_by_handle_at(struct thread *td, struct linux_open_by_handle_at_args *args) { l_uint fh_bytes; int bsd_flags, error; error = copyin(&args->handle->handle_bytes, &fh_bytes, sizeof(fh_bytes)); if (error != 0) return (error); if (fh_bytes < sizeof(fhandle_t)) return (EINVAL); bsd_flags = linux_common_openflags(args->flags); return (kern_fhopen(td, (void *)&args->handle->f_handle, bsd_flags)); } int linux_lseek(struct thread *td, struct linux_lseek_args *args) { return (kern_lseek(td, args->fdes, args->off, args->whence)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_llseek(struct thread *td, struct linux_llseek_args *args) { int error; off_t off; off = (args->olow) | (((off_t) args->ohigh) << 32); error = kern_lseek(td, args->fd, off, args->whence); if (error != 0) return (error); error = copyout(td->td_retval, args->res, sizeof(off_t)); if (error != 0) return (error); td->td_retval[0] = 0; return (0); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * Note that linux_getdents(2) and linux_getdents64(2) have the same * arguments. They only differ in the definition of struct dirent they * operate on. * Note that linux_readdir(2) is a special case of linux_getdents(2) * where count is always equals 1, meaning that the buffer is one * dirent-structure in size and that the code can't handle more anyway. * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2) * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will * trash user stack. */ static int linux_getdents_error(struct thread *td, int fd, int err) { struct vnode *vp; struct file *fp; int error; /* Linux return ENOTDIR in case when fd is not a directory. */ error = getvnode(td, fd, &cap_read_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (ENOTDIR); } fdrop(fp, td); return (err); } struct l_dirent { l_ulong d_ino; l_off_t d_off; l_ushort d_reclen; char d_name[LINUX_NAME_MAX + 1]; }; struct l_dirent64 { uint64_t d_ino; int64_t d_off; l_ushort d_reclen; u_char d_type; char d_name[LINUX_NAME_MAX + 1]; }; /* * Linux uses the last byte in the dirent buffer to store d_type, * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes. */ #define LINUX_RECLEN(namlen) \ roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong)) #define LINUX_RECLEN64(namlen) \ roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1, \ sizeof(uint64_t)) #ifdef LINUX_LEGACY_SYSCALLS int linux_getdents(struct thread *td, struct linux_getdents_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent *linux_dirent; int buflen, error; size_t retval; buflen = min(args->count, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = base + reclen; linux_dirent->d_reclen = linuxreclen; /* * Copy d_type to last byte of l_dirent buffer */ lbuf[linuxreclen - 1] = bdp->d_type; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)-1); error = copyout(linux_dirent, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #endif int linux_getdents64(struct thread *td, struct linux_getdents64_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent64 *linux_dirent64; int buflen, error; size_t retval; buflen = min(args->count, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dirent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN64(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent64 = (struct l_dirent64*)lbuf; linux_dirent64->d_ino = bdp->d_fileno; linux_dirent64->d_off = base + reclen; linux_dirent64->d_reclen = linuxreclen; linux_dirent64->d_type = bdp->d_type; strlcpy(linux_dirent64->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent64, d_name)); error = copyout(linux_dirent64, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_readdir(struct thread *td, struct linux_readdir_args *args) { struct dirent *bdp; caddr_t buf; /* BSD-format */ int linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent *linux_dirent; int buflen, error; buflen = LINUX_RECLEN(LINUX_NAME_MAX); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out; } if (td->td_retval[0] == 0) goto out; lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); bdp = (struct dirent *) buf; linuxreclen = LINUX_RECLEN(bdp->d_namlen); linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = linuxreclen; linux_dirent->d_reclen = bdp->d_namlen; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)); error = copyout(linux_dirent, args->dent, linuxreclen); if (error == 0) td->td_retval[0] = linuxreclen; free(lbuf, M_TEMP); out: free(buf, M_TEMP); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * These exist mainly for hooks for doing /compat/linux translation. */ #ifdef LINUX_LEGACY_SYSCALLS int linux_access(struct thread *td, struct linux_access_args *args) { char *path; int error; /* Linux convention. */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); if (!LUSECONVPATH(td)) { error = kern_accessat(td, AT_FDCWD, args->path, UIO_USERSPACE, 0, args->amode); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); } return (error); } #endif static int linux_do_accessat(struct thread *td, int ldfd, const char *filename, int amode, int flags) { char *path; int error, dfd; /* Linux convention. */ if (amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; if (!LUSECONVPATH(td)) { error = kern_accessat(td, dfd, filename, UIO_USERSPACE, flags, amode); } else { - LCONVPATHEXIST_AT(td, filename, &path, dfd); + LCONVPATHEXIST_AT(filename, &path, dfd); error = kern_accessat(td, dfd, path, UIO_SYSSPACE, flags, amode); LFREEPATH(path); } return (error); } int linux_faccessat(struct thread *td, struct linux_faccessat_args *args) { return (linux_do_accessat(td, args->dfd, args->filename, args->amode, 0)); } int linux_faccessat2(struct thread *td, struct linux_faccessat2_args *args) { int flags, unsupported; /* XXX. AT_SYMLINK_NOFOLLOW is not supported by kern_accessat */ unsupported = args->flags & ~(LINUX_AT_EACCESS | LINUX_AT_EMPTY_PATH); if (unsupported != 0) { linux_msg(td, "faccessat2 unsupported flag 0x%x", unsupported); return (EINVAL); } flags = (args->flags & LINUX_AT_EACCESS) == 0 ? 0 : AT_EACCESS; flags |= (args->flags & LINUX_AT_EMPTY_PATH) == 0 ? 0 : AT_EMPTY_PATH; return (linux_do_accessat(td, args->dfd, args->filename, args->amode, flags)); } #ifdef LINUX_LEGACY_SYSCALLS int linux_unlink(struct thread *td, struct linux_unlink_args *args) { char *path; int error; struct stat st; if (!LUSECONVPATH(td)) { error = kern_funlinkat(td, AT_FDCWD, args->path, FD_NONE, UIO_USERSPACE, 0, 0); if (error == EPERM) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, 0, AT_FDCWD, args->path, UIO_SYSSPACE, &st, NULL) == 0) { if (S_ISDIR(st.st_mode)) error = EISDIR; } } } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0); if (error == EPERM) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, NULL) == 0) { if (S_ISDIR(st.st_mode)) error = EISDIR; } } LFREEPATH(path); } return (error); } #endif static int linux_unlinkat_impl(struct thread *td, enum uio_seg pathseg, const char *path, int dfd, struct linux_unlinkat_args *args) { struct stat st; int error; if (args->flag & LINUX_AT_REMOVEDIR) error = kern_frmdirat(td, dfd, path, FD_NONE, pathseg, 0); else error = kern_funlinkat(td, dfd, path, FD_NONE, pathseg, 0, 0); if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path, UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode)) error = EISDIR; } return (error); } int linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args) { char *path; int error, dfd; if (args->flag & ~LINUX_AT_REMOVEDIR) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (linux_unlinkat_impl(td, UIO_USERSPACE, args->pathname, dfd, args)); } - LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + LCONVPATHEXIST_AT(args->pathname, &path, dfd); error = linux_unlinkat_impl(td, UIO_SYSSPACE, path, dfd, args); LFREEPATH(path); return (error); } int linux_chdir(struct thread *td, struct linux_chdir_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_chdir(td, args->path, UIO_USERSPACE)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_chdir(td, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_chmod(struct thread *td, struct linux_chmod_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_fchmodat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode, 0)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } #endif int linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (kern_fchmodat(td, dfd, args->filename, UIO_USERSPACE, args->mode, 0)); } - LCONVPATHEXIST_AT(td, args->filename, &path, dfd); + LCONVPATHEXIST_AT(args->filename, &path, dfd); error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mkdir(struct thread *td, struct linux_mkdir_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_mkdirat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode)); } - LCONVPATHCREAT(td, args->path, &path); + LCONVPATHCREAT(args->path, &path); error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } #endif int linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (kern_mkdirat(td, dfd, args->pathname, UIO_USERSPACE, args->mode)); } - LCONVPATHCREAT_AT(td, args->pathname, &path, dfd); + LCONVPATHCREAT_AT(args->pathname, &path, dfd); error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_rmdir(struct thread *td, struct linux_rmdir_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_frmdirat(td, AT_FDCWD, args->path, FD_NONE, UIO_USERSPACE, 0)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0); LFREEPATH(path); return (error); } int linux_rename(struct thread *td, struct linux_rename_args *args) { char *from, *to; int error; if (!LUSECONVPATH(td)) { return (kern_renameat(td, AT_FDCWD, args->from, AT_FDCWD, args->to, UIO_USERSPACE)); } - LCONVPATHEXIST(td, args->from, &from); + LCONVPATHEXIST(args->from, &from); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ - error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); + error = linux_emul_convpath(args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(from); return (error); } error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } #endif int linux_renameat(struct thread *td, struct linux_renameat_args *args) { struct linux_renameat2_args renameat2_args = { .olddfd = args->olddfd, .oldname = args->oldname, .newdfd = args->newdfd, .newname = args->newname, .flags = 0 }; return (linux_renameat2(td, &renameat2_args)); } int linux_renameat2(struct thread *td, struct linux_renameat2_args *args) { char *from, *to; int error, olddfd, newdfd; if (args->flags != 0) { if (args->flags & ~(LINUX_RENAME_EXCHANGE | LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT)) return (EINVAL); if (args->flags & LINUX_RENAME_EXCHANGE && args->flags & (LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT)) return (EINVAL); #if 0 /* * This spams the console on Ubuntu Focal. * * What's needed here is a general mechanism to let users know * about missing features without hogging the system. */ linux_msg(td, "renameat2 unsupported flags 0x%x", args->flags); #endif return (EINVAL); } olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; if (!LUSECONVPATH(td)) { return (kern_renameat(td, olddfd, args->oldname, newdfd, args->newname, UIO_USERSPACE)); } - LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd); + LCONVPATHEXIST_AT(args->oldname, &from, olddfd); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ - error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); + error = linux_emul_convpath(args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(from); return (error); } error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_symlink(struct thread *td, struct linux_symlink_args *args) { char *path, *to; int error; if (!LUSECONVPATH(td)) { return (kern_symlinkat(td, args->path, AT_FDCWD, args->to, UIO_USERSPACE)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ - error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); + error = linux_emul_convpath(args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } #endif int linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args) { char *path, *to; int error, dfd; dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; if (!LUSECONVPATH(td)) { return (kern_symlinkat(td, args->oldname, dfd, args->newname, UIO_USERSPACE)); } - LCONVPATHEXIST(td, args->oldname, &path); + LCONVPATHEXIST(args->oldname, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ - error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd); + error = linux_emul_convpath(args->newname, UIO_USERSPACE, &to, 1, dfd); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_readlink(struct thread *td, struct linux_readlink_args *args) { char *name; int error; if (!LUSECONVPATH(td)) { return (kern_readlinkat(td, AT_FDCWD, args->name, UIO_USERSPACE, args->buf, UIO_USERSPACE, args->count)); } - LCONVPATHEXIST(td, args->name, &name); + LCONVPATHEXIST(args->name, &name); error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->count); LFREEPATH(name); return (error); } #endif int linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args) { char *name; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (kern_readlinkat(td, dfd, args->path, UIO_USERSPACE, args->buf, UIO_USERSPACE, args->bufsiz)); } - LCONVPATHEXIST_AT(td, args->path, &name, dfd); + LCONVPATHEXIST_AT(args->path, &name, dfd); error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->bufsiz); LFREEPATH(name); return (error); } int linux_truncate(struct thread *td, struct linux_truncate_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_truncate(td, args->path, UIO_USERSPACE, args->length)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_truncate64(struct thread *td, struct linux_truncate64_args *args) { char *path; off_t length; int error; #if defined(__amd64__) && defined(COMPAT_LINUX32) length = PAIR32TO64(off_t, args->length); #else length = args->length; #endif if (!LUSECONVPATH(td)) { return (kern_truncate(td, args->path, UIO_USERSPACE, length)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_truncate(td, path, UIO_SYSSPACE, length); LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args) { return (kern_ftruncate(td, args->fd, args->length)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) { off_t length; #if defined(__amd64__) && defined(COMPAT_LINUX32) length = PAIR32TO64(off_t, args->length); #else length = args->length; #endif return (kern_ftruncate(td, args->fd, length)); } #endif #ifdef LINUX_LEGACY_SYSCALLS int linux_link(struct thread *td, struct linux_link_args *args) { char *path, *to; int error; if (!LUSECONVPATH(td)) { return (kern_linkat(td, AT_FDCWD, AT_FDCWD, args->path, args->to, UIO_USERSPACE, AT_SYMLINK_FOLLOW)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ - error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); + error = linux_emul_convpath(args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE, AT_SYMLINK_FOLLOW); LFREEPATH(path); LFREEPATH(to); return (error); } #endif int linux_linkat(struct thread *td, struct linux_linkat_args *args) { char *path, *to; int error, olddfd, newdfd, flag; if (args->flag & ~(LINUX_AT_SYMLINK_FOLLOW | LINUX_AT_EMPTY_PATH)) return (EINVAL); flag = (args->flag & LINUX_AT_SYMLINK_FOLLOW) != 0 ? AT_SYMLINK_FOLLOW : 0; flag |= (args->flag & LINUX_AT_EMPTY_PATH) != 0 ? AT_EMPTY_PATH : 0; olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; if (!LUSECONVPATH(td)) { return (kern_linkat(td, olddfd, newdfd, args->oldname, args->newname, UIO_USERSPACE, flag)); } - LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd); + LCONVPATHEXIST_AT(args->oldname, &path, olddfd); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ - error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); + error = linux_emul_convpath(args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, flag); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap) { return (kern_fsync(td, uap->fd, false)); } int linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap) { off_t nbytes, offset; #if defined(__amd64__) && defined(COMPAT_LINUX32) nbytes = PAIR32TO64(off_t, uap->nbytes); offset = PAIR32TO64(off_t, uap->offset); #else nbytes = uap->nbytes; offset = uap->offset; #endif if (offset < 0 || nbytes < 0 || (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE | LINUX_SYNC_FILE_RANGE_WRITE | LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) { return (EINVAL); } return (kern_fsync(td, uap->fd, false)); } int linux_pread(struct thread *td, struct linux_pread_args *uap) { struct vnode *vp; off_t offset; int error; #if defined(__amd64__) && defined(COMPAT_LINUX32) offset = PAIR32TO64(off_t, uap->offset); #else offset = uap->offset; #endif error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset); if (error == 0) { /* This seems to violate POSIX but Linux does it. */ error = fgetvp(td, uap->fd, &cap_pread_rights, &vp); if (error != 0) return (error); if (vp->v_type == VDIR) error = EISDIR; vrele(vp); } return (error); } int linux_pwrite(struct thread *td, struct linux_pwrite_args *uap) { off_t offset; #if defined(__amd64__) && defined(COMPAT_LINUX32) offset = PAIR32TO64(off_t, uap->offset); #else offset = uap->offset; #endif return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset)); } int linux_preadv(struct thread *td, struct linux_preadv_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_preadv(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_pwritev(struct thread *td, struct linux_pwritev_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_pwritev(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_mount(struct thread *td, struct linux_mount_args *args) { struct mntarg *ma = NULL; char *fstypename, *mntonname, *mntfromname, *data; int error, fsflags; fstypename = malloc(MNAMELEN, M_TEMP, M_WAITOK); mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK); mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK); data = NULL; error = copyinstr(args->filesystemtype, fstypename, MNAMELEN - 1, NULL); if (error != 0) goto out; if (args->specialfile != NULL) { error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL); if (error != 0) goto out; } else { mntfromname[0] = '\0'; } error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL); if (error != 0) goto out; if (strcmp(fstypename, "ext2") == 0) { strcpy(fstypename, "ext2fs"); } else if (strcmp(fstypename, "proc") == 0) { strcpy(fstypename, "linprocfs"); } else if (strcmp(fstypename, "vfat") == 0) { strcpy(fstypename, "msdosfs"); } else if (strcmp(fstypename, "fuse") == 0 || strncmp(fstypename, "fuse.", 5) == 0) { char *fuse_options, *fuse_option, *fuse_name; strcpy(mntfromname, "/dev/fuse"); strcpy(fstypename, "fusefs"); data = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(args->data, data, MNAMELEN - 1, NULL); if (error != 0) goto out; fuse_options = data; while ((fuse_option = strsep(&fuse_options, ",")) != NULL) { fuse_name = strsep(&fuse_option, "="); if (fuse_name == NULL || fuse_option == NULL) goto out; ma = mount_arg(ma, fuse_name, fuse_option, -1); } /* * The FUSE server uses Linux errno values instead of FreeBSD * ones; add a flag to tell fuse(4) to do errno translation. */ ma = mount_arg(ma, "linux_errnos", "1", -1); } fsflags = 0; /* * Linux SYNC flag is not included; the closest equivalent * FreeBSD has is !ASYNC, which is our default. */ if (args->rwflag & LINUX_MS_RDONLY) fsflags |= MNT_RDONLY; if (args->rwflag & LINUX_MS_NOSUID) fsflags |= MNT_NOSUID; if (args->rwflag & LINUX_MS_NOEXEC) fsflags |= MNT_NOEXEC; if (args->rwflag & LINUX_MS_REMOUNT) fsflags |= MNT_UPDATE; ma = mount_arg(ma, "fstype", fstypename, -1); ma = mount_arg(ma, "fspath", mntonname, -1); ma = mount_arg(ma, "from", mntfromname, -1); error = kernel_mount(ma, fsflags); out: free(fstypename, M_TEMP); free(mntonname, M_TEMP); free(mntfromname, M_TEMP); free(data, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_oldumount(struct thread *td, struct linux_oldumount_args *args) { return (kern_unmount(td, args->path, 0)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_umount(struct thread *td, struct linux_umount_args *args) { int flags; flags = 0; if ((args->flags & LINUX_MNT_FORCE) != 0) { args->flags &= ~LINUX_MNT_FORCE; flags |= MNT_FORCE; } if (args->flags != 0) { linux_msg(td, "unsupported umount2 flags %#x", args->flags); return (EINVAL); } return (kern_unmount(td, args->path, flags)); } #endif /* * fcntl family of syscalls */ struct l_flock { l_short l_type; l_short l_whence; l_off_t l_start; l_off_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_off_t)bsd_flock->l_start; linux_flock->l_len = (l_off_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_flock64 { l_short l_type; l_short l_whence; l_loff_t l_start; l_loff_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_loff_t)bsd_flock->l_start; linux_flock->l_len = (l_loff_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int fcntl_common(struct thread *td, struct linux_fcntl_args *args) { struct l_flock linux_flock; struct flock bsd_flock; struct pipe *fpipe; struct file *fp; long arg; int error, result; switch (args->cmd) { case LINUX_F_DUPFD: return (kern_fcntl(td, args->fd, F_DUPFD, args->arg)); case LINUX_F_GETFD: return (kern_fcntl(td, args->fd, F_GETFD, 0)); case LINUX_F_SETFD: return (kern_fcntl(td, args->fd, F_SETFD, args->arg)); case LINUX_F_GETFL: error = kern_fcntl(td, args->fd, F_GETFL, 0); result = td->td_retval[0]; td->td_retval[0] = 0; if (result & O_RDONLY) td->td_retval[0] |= LINUX_O_RDONLY; if (result & O_WRONLY) td->td_retval[0] |= LINUX_O_WRONLY; if (result & O_RDWR) td->td_retval[0] |= LINUX_O_RDWR; if (result & O_NDELAY) td->td_retval[0] |= LINUX_O_NONBLOCK; if (result & O_APPEND) td->td_retval[0] |= LINUX_O_APPEND; if (result & O_FSYNC) td->td_retval[0] |= LINUX_O_SYNC; if (result & O_ASYNC) td->td_retval[0] |= LINUX_O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (result & O_NOFOLLOW) td->td_retval[0] |= LINUX_O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (result & O_DIRECT) td->td_retval[0] |= LINUX_O_DIRECT; #endif return (error); case LINUX_F_SETFL: arg = 0; if (args->arg & LINUX_O_NDELAY) arg |= O_NONBLOCK; if (args->arg & LINUX_O_APPEND) arg |= O_APPEND; if (args->arg & LINUX_O_SYNC) arg |= O_FSYNC; if (args->arg & LINUX_O_ASYNC) arg |= O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (args->arg & LINUX_O_NOFOLLOW) arg |= O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (args->arg & LINUX_O_DIRECT) arg |= O_DIRECT; #endif return (kern_fcntl(td, args->fd, F_SETFL, arg)); case LINUX_F_GETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); case LINUX_F_GETOWN: return (kern_fcntl(td, args->fd, F_GETOWN, 0)); case LINUX_F_SETOWN: /* * XXX some Linux applications depend on F_SETOWN having no * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ error = fget(td, args->fd, &cap_fcntl_rights, &fp); if (error) return (error); if (fp->f_type == DTYPE_PIPE) { fdrop(fp, td); return (EINVAL); } fdrop(fp, td); return (kern_fcntl(td, args->fd, F_SETOWN, args->arg)); case LINUX_F_DUPFD_CLOEXEC: return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg)); /* * Our F_SEAL_* values match Linux one for maximum compatibility. So we * only needed to account for different values for fcntl(2) commands. */ case LINUX_F_GET_SEALS: error = kern_fcntl(td, args->fd, F_GET_SEALS, 0); if (error != 0) return (error); td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0], seal_bitmap, 0); return (0); case LINUX_F_ADD_SEALS: return (kern_fcntl(td, args->fd, F_ADD_SEALS, linux_to_bsd_bits(args->arg, seal_bitmap, 0))); case LINUX_F_GETPIPE_SZ: error = fget(td, args->fd, &cap_fcntl_rights, &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_PIPE) { fdrop(fp, td); return (EINVAL); } fpipe = fp->f_data; td->td_retval[0] = fpipe->pipe_buffer.size; fdrop(fp, td); return (0); default: linux_msg(td, "unsupported fcntl cmd %d", args->cmd); return (EINVAL); } } int linux_fcntl(struct thread *td, struct linux_fcntl_args *args) { return (fcntl_common(td, args)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) { struct l_flock64 linux_flock; struct flock bsd_flock; struct linux_fcntl_args fcntl_args; int error; switch (args->cmd) { case LINUX_F_GETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock64(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); } fcntl_args.fd = args->fd; fcntl_args.cmd = args->cmd; fcntl_args.arg = args->arg; return (fcntl_common(td, &fcntl_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_chown(struct thread *td, struct linux_chown_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->uid, args->gid, 0)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, 0); LFREEPATH(path); return (error); } #endif int linux_fchownat(struct thread *td, struct linux_fchownat_args *args) { char *path; int error, dfd, flag, unsupported; unsupported = args->flag & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH); if (unsupported != 0) { linux_msg(td, "fchownat unsupported flag 0x%x", unsupported); return (EINVAL); } flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 : AT_SYMLINK_NOFOLLOW; flag |= (args->flag & LINUX_AT_EMPTY_PATH) == 0 ? 0 : AT_EMPTY_PATH; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (kern_fchownat(td, dfd, args->filename, UIO_USERSPACE, args->uid, args->gid, flag)); } - LCONVPATHEXIST_AT(td, args->filename, &path, dfd); + LCONVPATHEXIST_AT(args->filename, &path, dfd); error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid, flag); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_lchown(struct thread *td, struct linux_lchown_args *args) { char *path; int error; if (!LUSECONVPATH(td)) { return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->uid, args->gid, AT_SYMLINK_NOFOLLOW)); } - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, AT_SYMLINK_NOFOLLOW); LFREEPATH(path); return (error); } #endif static int convert_fadvice(int advice) { switch (advice) { case LINUX_POSIX_FADV_NORMAL: return (POSIX_FADV_NORMAL); case LINUX_POSIX_FADV_RANDOM: return (POSIX_FADV_RANDOM); case LINUX_POSIX_FADV_SEQUENTIAL: return (POSIX_FADV_SEQUENTIAL); case LINUX_POSIX_FADV_WILLNEED: return (POSIX_FADV_WILLNEED); case LINUX_POSIX_FADV_DONTNEED: return (POSIX_FADV_DONTNEED); case LINUX_POSIX_FADV_NOREUSE: return (POSIX_FADV_NOREUSE); default: return (-1); } } int linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args) { off_t offset; int advice; #if defined(__amd64__) && defined(COMPAT_LINUX32) offset = PAIR32TO64(off_t, args->offset); #else offset = args->offset; #endif advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, offset, args->len, advice)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) { off_t len, offset; int advice; #if defined(__amd64__) && defined(COMPAT_LINUX32) len = PAIR32TO64(off_t, args->len); offset = PAIR32TO64(off_t, args->offset); #else len = args->len; offset = args->offset; #endif advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, offset, len, advice)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_pipe(struct thread *td, struct linux_pipe_args *args) { int fildes[2]; int error; error = kern_pipe(td, fildes, 0, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } #endif int linux_pipe2(struct thread *td, struct linux_pipe2_args *args) { int fildes[2]; int error, flags; if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0) return (EINVAL); flags = 0; if ((args->flags & LINUX_O_NONBLOCK) != 0) flags |= O_NONBLOCK; if ((args->flags & LINUX_O_CLOEXEC) != 0) flags |= O_CLOEXEC; error = kern_pipe(td, fildes, flags, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } int linux_dup3(struct thread *td, struct linux_dup3_args *args) { int cmd; intptr_t newfd; if (args->oldfd == args->newfd) return (EINVAL); if ((args->flags & ~LINUX_O_CLOEXEC) != 0) return (EINVAL); if (args->flags & LINUX_O_CLOEXEC) cmd = F_DUP2FD_CLOEXEC; else cmd = F_DUP2FD; newfd = args->newfd; return (kern_fcntl(td, args->oldfd, cmd, newfd)); } int linux_fallocate(struct thread *td, struct linux_fallocate_args *args) { off_t len, offset; /* * We emulate only posix_fallocate system call for which * mode should be 0. */ if (args->mode != 0) return (EOPNOTSUPP); #if defined(__amd64__) && defined(COMPAT_LINUX32) len = PAIR32TO64(off_t, args->len); offset = PAIR32TO64(off_t, args->offset); #else len = args->len; offset = args->offset; #endif return (kern_posix_fallocate(td, args->fd, offset, len)); } int linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args *args) { l_loff_t inoff, outoff, *inoffp, *outoffp; int error, flags; /* * copy_file_range(2) on Linux doesn't define any flags (yet), so is * the native implementation. Enforce it. */ if (args->flags != 0) { linux_msg(td, "copy_file_range unsupported flags 0x%x", args->flags); return (EINVAL); } flags = 0; inoffp = outoffp = NULL; if (args->off_in != NULL) { error = copyin(args->off_in, &inoff, sizeof(l_loff_t)); if (error != 0) return (error); inoffp = &inoff; } if (args->off_out != NULL) { error = copyin(args->off_out, &outoff, sizeof(l_loff_t)); if (error != 0) return (error); outoffp = &outoff; } error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out, outoffp, args->len, flags); if (error == 0 && args->off_in != NULL) error = copyout(inoffp, args->off_in, sizeof(l_loff_t)); if (error == 0 && args->off_out != NULL) error = copyout(outoffp, args->off_out, sizeof(l_loff_t)); return (error); } #define LINUX_MEMFD_PREFIX "memfd:" int linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args) { char memfd_name[LINUX_NAME_MAX + 1]; int error, flags, shmflags, oflags; /* * This is our clever trick to avoid the heap allocation to copy in the * uname. We don't really need to go this far out of our way, but it * does keep the rest of this function fairly clean as they don't have * to worry about cleanup on the way out. */ error = copyinstr(args->uname_ptr, memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1, LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL); if (error != 0) { if (error == ENAMETOOLONG) error = EINVAL; return (error); } memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1); flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0); if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_HUGE_MASK)) != 0) return (EINVAL); /* Size specified but no HUGETLB. */ if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0) return (EINVAL); /* We don't actually support HUGETLB. */ if ((flags & MFD_HUGETLB) != 0) return (ENOSYS); oflags = O_RDWR; shmflags = SHM_GROW_ON_WRITE; if ((flags & MFD_CLOEXEC) != 0) oflags |= O_CLOEXEC; if ((flags & MFD_ALLOW_SEALING) != 0) shmflags |= SHM_ALLOW_SEALING; return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL, memfd_name)); } int linux_splice(struct thread *td, struct linux_splice_args *args) { linux_msg(td, "syscall splice not really implemented"); /* * splice(2) is documented to return EINVAL in various circumstances; * returning it instead of ENOSYS should hint the caller to use fallback * instead. */ return (EINVAL); } diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index 79b681ea8b62..2d4936228315 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -1,2891 +1,2891 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #if defined(__i386__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #include #include #include int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalhigh; l_ulong freehigh; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; struct l_pselect6arg { l_uintptr_t ss; l_size_t ss_len; }; static int linux_utimensat_lts_to_ts(struct l_timespec *, struct timespec *); #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, struct timespec *); #endif static int linux_common_utimensat(struct thread *, int, const char *, struct timespec *, int); static int linux_common_pselect6(struct thread *, l_int, l_fd_set *, l_fd_set *, l_fd_set *, struct timespec *, l_uintptr_t *); static int linux_common_ppoll(struct thread *, struct pollfd *, uint32_t, struct timespec *, l_sigset_t *, l_size_t); static int linux_pollin(struct thread *, struct pollfd *, struct pollfd *, u_int); static int linux_pollout(struct thread *, struct pollfd *, struct pollfd *, u_int); int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; int i, j; struct timespec ts; bzero(&sysinfo, sizeof(sysinfo)); getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; /* * sharedram counts pages allocated to named, swap-backed objects such * as shared memory segments and tmpfs files. There is no cheap way to * compute this, so just leave the field unpopulated. Linux itself only * started setting this field in the 3.x timeframe. */ sysinfo.sharedram = 0; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* * Platforms supported by the emulation layer do not have a notion of * high memory. */ sysinfo.totalhigh = 0; sysinfo.freehigh = 0; sysinfo.mem_unit = 1; return (copyout(&sysinfo, args->info, sizeof(sysinfo))); } #ifdef LINUX_LEGACY_SYSCALLS int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; int error; secs = args->secs; /* * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 * to match kern_setitimer()'s limit to avoid error from it. * * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit * platforms. */ if (secs > INT32_MAX / 2) secs = INT32_MAX / 2; it.it_value.tv_sec = secs; it.it_value.tv_usec = 0; timevalclear(&it.it_interval); error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); KASSERT(error == 0, ("kern_setitimer returns %d", error)); if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || old_it.it_value.tv_usec >= 500000) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; return (0); } #endif int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; uintptr_t new, old; old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (uintptr_t)args->dsend; if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) td->td_retval[0] = (register_t)new; else td->td_retval[0] = (register_t)old; return (0); } #if defined(__i386__) /* XXX: what about amd64/linux32? */ int linux_uselib(struct thread *td, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; vm_map_t map; vm_map_entry_t entry; struct vattr attr; vm_offset_t vmaddr; unsigned long file_offset; unsigned long bss_size; char *library; ssize_t aresid; int error; bool locked, opened, textset; a_out = NULL; vp = NULL; locked = false; textset = false; opened = false; if (!LUSECONVPATH(td)) { NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, args->library, td); error = namei(&ni); } else { - LCONVPATHEXIST(td, args->library, &library); + LCONVPATHEXIST(args->library, &library); NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, library, td); error = namei(&ni); LFREEPATH(library); } if (error) goto cleanup; vp = ni.ni_vp; NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. * XXX: The code below largely duplicates exec_check_permissions(). */ locked = true; /* Executable? */ error = VOP_GETATTR(vp, &attr, td->td_ucred); if (error) goto cleanup; if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { /* EACCESS is what exec(2) returns. */ error = ENOEXEC; goto cleanup; } /* Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; goto cleanup; } /* Can we access it? */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) goto cleanup; /* * XXX: This should use vn_open() so that it is properly authorized, * and to reduce code redundancy all over the place here. * XXX: Not really, it duplicates far more of exec_check_permissions() * than vn_open(). */ #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VREAD); if (error) goto cleanup; #endif error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error) goto cleanup; opened = true; /* Pull in executable header into exec_map */ error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); if (error) goto cleanup; /* Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* * While we are here, we should REALLY do some more checks */ /* Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* * text/data/bss must not exceed limits * XXX - this is not complete. it should check current usage PLUS * the resources needed by this library. */ PROC_LOCK(td->td_proc); if (a_out->a_text > maxtsiz || a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || racct_set(td->td_proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(td->td_proc); error = ENOMEM; goto cleanup; } PROC_UNLOCK(td->td_proc); /* * Prevent more writers. */ error = VOP_SET_TEXT(vp); if (error != 0) goto cleanup; textset = true; /* * Lock no longer needed */ locked = false; VOP_UNLOCK(vp); /* * Check if file_offset page aligned. Currently we cannot handle * misalinged file offsets, and so we read in the entire image * (what a waste). */ if (file_offset & PAGE_MASK) { /* Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, td->td_ucred, NOCRED, &aresid, td); if (error != 0) goto cleanup; if (aresid != 0) { error = ENOEXEC; goto cleanup; } } else { /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single * copy-on-write "data" segment. */ map = &td->td_proc->p_vmspace->vm_map; error = vm_mmap(map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); if (error) goto cleanup; vm_map_lock(map); if (!vm_map_lookup_entry(map, vmaddr, &entry)) { vm_map_unlock(map); error = EDOOFUS; goto cleanup; } entry->eflags |= MAP_ENTRY_VN_EXEC; vm_map_unlock(map); textset = false; } if (bss_size != 0) { /* Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* allocate some 'anon' space */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: if (opened) { if (locked) VOP_UNLOCK(vp); locked = false; VOP_CLOSE(vp, FREAD, td->td_ucred, td); } if (textset) { if (!locked) { locked = true; VOP_LOCK(vp, LK_SHARED | LK_RETRY); } VOP_UNSET_TEXT_CHECKED(vp); } if (locked) VOP_UNLOCK(vp); /* Release the temporary mapping. */ if (a_out) kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); return (error); } #endif /* __i386__ */ #ifdef LINUX_LEGACY_SYSCALLS int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, LINUX_NFDBITS); if (error) goto select_out; if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: return (error); } #endif int linux_mremap(struct thread *td, struct linux_mremap_args *args) { uintptr_t addr; size_t len; int error = 0; if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return (ENOMEM); } if (args->new_len < args->old_len) { addr = args->addr + args->new_len; len = args->old_len - args->new_len; error = kern_munmap(td, addr, len); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return (error); } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { return (kern_msync(td, args->addr, args->len, args->fl & ~LINUX_MS_SYNC)); } #ifdef LINUX_LEGACY_SYSCALLS int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return (error); td->td_retval[0] = tm; return (0); } #endif struct l_times_argv { l_clock_t tms_utime; l_clock_t tms_stime; l_clock_t tms_cutime; l_clock_t tms_cstime; }; /* * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK * auxiliary vector entry. */ #define CLK_TCK 100 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ CONVNTCK(r) : CONVOTCK(r)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return (error); } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return (0); } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } #if defined(__amd64__) /* * On amd64, Linux uname(2) needs to return "x86_64" * for both 64-bit and 32-bit applications. On 32-bit, * the string returned by getauxval(AT_PLATFORM) needs * to remain "i686", though. */ strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); #elif defined(__aarch64__) strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); #elif defined(__i386__) strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); #endif return (copyout(&utsname, args->buf, sizeof(utsname))); } struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; #ifdef LINUX_LEGACY_SYSCALLS int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; char *fname; int error; if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut)) != 0) return (error); tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; if (!LUSECONVPATH(td)) { error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, tvp, UIO_SYSSPACE); } else { - LCONVPATHEXIST(td, args->fname, &fname); + LCONVPATHEXIST(args->fname, &fname); error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); } return (error); } #endif #ifdef LINUX_LEGACY_SYSCALLS int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error; if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) return (error); tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } if (!LUSECONVPATH(td)) { error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, tvp, UIO_SYSSPACE); } else { - LCONVPATHEXIST(td, args->fname, &fname); + LCONVPATHEXIST(args->fname, &fname); error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); } return (error); } #endif static int linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) { if (l_times->tv_nsec != LINUX_UTIME_OMIT && l_times->tv_nsec != LINUX_UTIME_NOW && (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) return (EINVAL); times->tv_sec = l_times->tv_sec; switch (l_times->tv_nsec) { case LINUX_UTIME_OMIT: times->tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times->tv_nsec = UTIME_NOW; break; default: times->tv_nsec = l_times->tv_nsec; } return (0); } static int linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, struct timespec *timesp, int lflags) { char *path = NULL; int error, dfd, flags = 0; dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) return (EINVAL); if (timesp != NULL) { /* This breaks POSIX, but is what the Linux kernel does * _on purpose_ (documented in the man page for utimensat(2)), * so we must follow that behaviour. */ if (timesp[0].tv_nsec == UTIME_OMIT && timesp[1].tv_nsec == UTIME_OMIT) return (0); } if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) flags |= AT_SYMLINK_NOFOLLOW; if (lflags & LINUX_AT_EMPTY_PATH) flags |= AT_EMPTY_PATH; if (!LUSECONVPATH(td)) { if (pathname != NULL) { return (kern_utimensat(td, dfd, pathname, UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); } } if (pathname != NULL) - LCONVPATHEXIST_AT(td, pathname, &path, dfd); + LCONVPATHEXIST_AT(pathname, &path, dfd); else if (lflags != 0) return (EINVAL); if (path == NULL) error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); else { error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, UIO_SYSSPACE, flags); LFREEPATH(path); } return (error); } int linux_utimensat(struct thread *td, struct linux_utimensat_args *args) { struct l_timespec l_times[2]; struct timespec times[2], *timesp; int error; if (args->times != NULL) { error = copyin(args->times, l_times, sizeof(l_times)); if (error != 0) return (error); error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); if (error != 0) return (error); error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); if (error != 0) return (error); timesp = times; } else timesp = NULL; return (linux_common_utimensat(td, args->dfd, args->pathname, timesp, args->flags)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) { if (l_times->tv_nsec != LINUX_UTIME_OMIT && l_times->tv_nsec != LINUX_UTIME_NOW && (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) return (EINVAL); times->tv_sec = l_times->tv_sec; switch (l_times->tv_nsec) { case LINUX_UTIME_OMIT: times->tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times->tv_nsec = UTIME_NOW; break; default: times->tv_nsec = l_times->tv_nsec; } return (0); } int linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) { struct l_timespec64 l_times[2]; struct timespec times[2], *timesp; int error; if (args->times64 != NULL) { error = copyin(args->times64, l_times, sizeof(l_times)); if (error != 0) return (error); error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); if (error != 0) return (error); error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); if (error != 0) return (error); timesp = times; } else timesp = NULL; return (linux_common_utimensat(td, args->dfd, args->pathname, timesp, args->flags)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) return (error); tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } if (!LUSECONVPATH(td)) { error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, tvp, UIO_SYSSPACE); } else { - LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); + LCONVPATHEXIST_AT(args->filename, &fname, dfd); error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); } return (error); } #endif static int linux_common_wait(struct thread *td, int pid, int *statusp, int options, struct __wrusage *wrup) { siginfo_t siginfo; idtype_t idtype; id_t id; int error, status, tmpstat; if (pid == WAIT_ANY) { idtype = P_ALL; id = 0; } else if (pid < 0) { idtype = P_PGID; id = (id_t)-pid; } else { idtype = P_PID; id = (id_t)pid; } /* * For backward compatibility we implicitly add flags WEXITED * and WTRAPPED here. */ options |= WEXITED | WTRAPPED; error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); if (error) return (error); if (statusp) { tmpstat = status & 0xffff; if (WIFSIGNALED(tmpstat)) { tmpstat = (tmpstat & 0xffffff80) | bsd_to_linux_signal(WTERMSIG(tmpstat)); } else if (WIFSTOPPED(tmpstat)) { tmpstat = (tmpstat & 0xffff00ff) | (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) if (WSTOPSIG(status) == SIGTRAP) { tmpstat = linux_ptrace_status(td, siginfo.si_pid, tmpstat); } #endif } else if (WIFCONTINUED(tmpstat)) { tmpstat = 0xffff; } error = copyout(&tmpstat, statusp, sizeof(int)); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { struct linux_wait4_args wait4_args; wait4_args.pid = args->pid; wait4_args.status = args->status; wait4_args.options = args->options; wait4_args.rusage = NULL; return (linux_wait4(td, &wait4_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct __wrusage wru, *wrup; if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); options = WEXITED; linux_to_bsd_waitopts(args->options, &options); if (args->rusage != NULL) wrup = &wru; else wrup = NULL; error = linux_common_wait(td, args->pid, args->status, options, wrup); if (error != 0) return (error); if (args->rusage != NULL) error = linux_copyout_rusage(&wru.wru_self, args->rusage); return (error); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { int status, options, sig; struct __wrusage wru; siginfo_t siginfo; l_siginfo_t lsi; idtype_t idtype; struct proc *p; int error; options = 0; linux_to_bsd_waitopts(args->options, &options); if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) return (EINVAL); if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) return (EINVAL); switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; break; case LINUX_P_PID: if (args->id <= 0) return (EINVAL); idtype = P_PID; break; case LINUX_P_PGID: if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; default: return (EINVAL); } error = kern_wait6(td, idtype, args->id, &status, options, &wru, &siginfo); if (error != 0) return (error); if (args->rusage != NULL) { error = linux_copyout_rusage(&wru.wru_children, args->rusage); if (error != 0) return (error); } if (args->info != NULL) { p = td->td_proc; bzero(&lsi, sizeof(lsi)); if (td->td_retval[0] != 0) { sig = bsd_to_linux_signal(siginfo.si_signo); siginfo_to_lsiginfo(&siginfo, &lsi, sig); } error = copyout(&lsi, args->info, sizeof(lsi)); } td->td_retval[0] = 0; return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mknod(struct thread *td, struct linux_mknod_args *args) { char *path; int error; enum uio_seg seg; bool convpath; convpath = LUSECONVPATH(td); if (!convpath) { path = args->path; seg = UIO_USERSPACE; } else { - LCONVPATHCREAT(td, args->path, &path); + LCONVPATHCREAT(args->path, &path); seg = UIO_SYSSPACE; } switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, AT_FDCWD, path, seg, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, AT_FDCWD, path, seg, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, AT_FDCWD, path, seg, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } if (convpath) LFREEPATH(path); return (error); } #endif int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { char *path; int error, dfd; enum uio_seg seg; bool convpath; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; convpath = LUSECONVPATH(td); if (!convpath) { path = __DECONST(char *, args->filename); seg = UIO_USERSPACE; } else { - LCONVPATHCREAT_AT(td, args->filename, &path, dfd); + LCONVPATHCREAT_AT(args->filename, &path, dfd); seg = UIO_SYSSPACE; } switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, dfd, path, seg, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, dfd, path, seg, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, dfd, path, seg, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } if (convpath) LFREEPATH(path); return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { struct linux_pemuldata *pem; struct proc *p = td->td_proc; uint32_t old; PROC_LOCK(p); pem = pem_find(p); old = pem->persona; if (args->per != 0xffffffff) pem->persona = args->per; PROC_UNLOCK(p); td->td_retval[0] = old; return (0); } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; newcred = crget(); crextend(newcred, ngrp + 1); p = td->td_proc; PROC_LOCK(p); oldcred = p->p_ucred; crcopy(newcred, oldcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { PROC_UNLOCK(p); crfree(newcred); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); return (error); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); free(linux_gidset, M_LINUX); if (error) return (error); td->td_retval[0] = ngrp; return (0); } static bool linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) { if (linux_dummy_rlimits == 0) return (false); switch (resource) { case LINUX_RLIMIT_LOCKS: case LINUX_RLIMIT_SIGPENDING: case LINUX_RLIMIT_MSGQUEUE: case LINUX_RLIMIT_RTTIME: rlim->rlim_cur = LINUX_RLIM_INFINITY; rlim->rlim_max = LINUX_RLIM_INFINITY; return (true); case LINUX_RLIMIT_NICE: case LINUX_RLIMIT_RTPRIO: rlim->rlim_cur = 0; rlim->rlim_max = 0; return (true); default: return (false); } } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { rlim.rlim_cur = bsd_rlim.rlim_cur; rlim.rlim_max = bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { rlim.rlim_cur = bsd_rlim.rlim_cur; rlim.rlim_max = bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_param sched_param; struct thread *tdt; int error, policy; switch (args->policy) { case LINUX_SCHED_OTHER: policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: policy = SCHED_FIFO; break; case LINUX_SCHED_RR: policy = SCHED_RR; break; default: return (EINVAL); } error = copyin(args->param, &sched_param, sizeof(sched_param)); if (error) return (error); if (linux_map_sched_prio) { switch (policy) { case SCHED_OTHER: if (sched_param.sched_priority != 0) return (EINVAL); sched_param.sched_priority = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; break; case SCHED_FIFO: case SCHED_RR: if (sched_param.sched_priority < 1 || sched_param.sched_priority >= LINUX_MAX_RT_PRIO) return (EINVAL); /* * Map [1, LINUX_MAX_RT_PRIO - 1] to * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). */ sched_param.sched_priority = (sched_param.sched_priority - 1) * (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / (LINUX_MAX_RT_PRIO - 1); break; } } tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setscheduler(td, tdt, policy, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct thread *tdt; int error, policy; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return (error); } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; if (linux_map_sched_prio) { switch (args->policy) { case LINUX_SCHED_OTHER: td->td_retval[0] = 0; return (0); case LINUX_SCHED_FIFO: case LINUX_SCHED_RR: td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; return (0); default: return (EINVAL); } } switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_max(td, &bsd)); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; if (linux_map_sched_prio) { switch (args->policy) { case LINUX_SCHED_OTHER: td->td_retval[0] = 0; return (0); case LINUX_SCHED_FIFO: case LINUX_SCHED_RR: td->td_retval[0] = 1; return (0); default: return (EINVAL); } } switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_min(td, &bsd)); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; if (args->magic1 != REBOOT_MAGIC1) return (EINVAL); switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return (EINVAL); } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return (EINVAL); } return (sys_reboot(td, &bsd_args)); } int linux_getpid(struct thread *td, struct linux_getpid_args *args) { td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { struct linux_emuldata *em; em = em_find(td); KASSERT(em != NULL, ("gettid: emuldata not found.\n")); td->td_retval[0] = em->em_tid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { td->td_retval[0] = kern_getppid(td); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { return (kern_getsid(td, args->pid)); } int linux_nosys(struct thread *td, struct nosys_args *ignore) { return (ENOSYS); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { int error; error = kern_getpriority(td, args->which, args->who); td->td_retval[0] = 20 - td->td_retval[0]; return (error); } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, args->error_code); /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, args->error_code, 0); /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION_1 0x19980330 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 struct l_user_cap_header { l_int version; l_int pid; }; struct l_user_cap_data { l_int effective; l_int permitted; l_int inheritable; }; int linux_capget(struct thread *td, struct linux_capget_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, u32s; if (uap->hdrp == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); if (uap->datap) { /* * The current implementation doesn't support setting * a capability (it's essentially a stub) so indicate * that no capabilities are currently set or available * to request. */ memset(&lucd, 0, u32s * sizeof(lucd[0])); error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); } return (error); } int linux_capset(struct thread *td, struct linux_capset_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, i, u32s; if (uap->hdrp == NULL || uap->datap == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); if (error != 0) return (error); /* We currently don't support setting any capabilities. */ for (i = 0; i < u32s; i++) { if (lucd[i].effective || lucd[i].permitted || lucd[i].inheritable) { linux_msg(td, "capset[%d] effective=0x%x, permitted=0x%x, " "inheritable=0x%x is not implemented", i, (int)lucd[i].effective, (int)lucd[i].permitted, (int)lucd[i].inheritable); return (EPERM); } } return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size, arg; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; int pdeath_signal, trace_state; switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); pdeath_signal = linux_to_bsd_signal(args->arg2); return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, &pdeath_signal)); case LINUX_PR_GET_PDEATHSIG: error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, &pdeath_signal); if (error != 0) return (error); pdeath_signal = bsd_to_linux_signal(pdeath_signal); return (copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal))); /* * In Linux, this flag controls if set[gu]id processes can coredump. * There are additional semantics imposed on processes that cannot * coredump: * - Such processes can not be ptraced. * - There are some semantics around ownership of process-related files * in the /proc namespace. * * In FreeBSD, we can (and by default, do) disable setuid coredump * system-wide with 'sugid_coredump.' We control tracability on a * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). * By happy coincidence, P2_NOTRACE also prevents coredumping. So the * procctl is roughly analogous to Linux's DUMPABLE. * * So, proxy these knobs to the corresponding PROC_TRACE setting. */ case LINUX_PR_GET_DUMPABLE: error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, &trace_state); if (error != 0) return (error); td->td_retval[0] = (trace_state != -1); return (0); case LINUX_PR_SET_DUMPABLE: /* * It is only valid for userspace to set one of these two * flags, and only one at a time. */ switch (args->arg2) { case LINUX_SUID_DUMP_DISABLE: trace_state = PROC_TRACE_CTL_DISABLE_EXEC; break; case LINUX_SUID_DUMP_USER: trace_state = PROC_TRACE_CTL_ENABLE; break; default: return (EINVAL); } return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, &trace_state)); case LINUX_PR_GET_KEEPCAPS: /* * Indicate that we always clear the effective and * permitted capability sets when the user id becomes * non-zero (actually the capability sets are simply * always zero in the current implementation). */ td->td_retval[0] = 0; break; case LINUX_PR_SET_KEEPCAPS: /* * Ignore requests to keep the effective and permitted * capability sets when the user id becomes non-zero. */ break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a Linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; case LINUX_PR_GET_SECCOMP: case LINUX_PR_SET_SECCOMP: /* * Same as returned by Linux without CONFIG_SECCOMP enabled. */ error = EINVAL; break; case LINUX_PR_CAPBSET_READ: #if 0 /* * This makes too much noise with Ubuntu Focal. */ linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", (int)args->arg2); #endif error = EINVAL; break; case LINUX_PR_SET_NO_NEW_PRIVS: arg = args->arg2 == 1 ? PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; error = kern_procctl(td, P_PID, p->p_pid, PROC_NO_NEW_PRIVS_CTL, &arg); break; case LINUX_PR_SET_PTRACER: linux_msg(td, "unsupported prctl PR_SET_PTRACER"); error = EINVAL; break; default: linux_msg(td, "unsupported prctl option %d", args->option); error = EINVAL; break; } return (error); } int linux_sched_setparam(struct thread *td, struct linux_sched_setparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error, policy; error = copyin(uap->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); if (linux_map_sched_prio) { error = kern_sched_getscheduler(td, tdt, &policy); if (error) goto out; switch (policy) { case SCHED_OTHER: if (sched_param.sched_priority != 0) { error = EINVAL; goto out; } sched_param.sched_priority = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; break; case SCHED_FIFO: case SCHED_RR: if (sched_param.sched_priority < 1 || sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { error = EINVAL; goto out; } /* * Map [1, LINUX_MAX_RT_PRIO - 1] to * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). */ sched_param.sched_priority = (sched_param.sched_priority - 1) * (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / (LINUX_MAX_RT_PRIO - 1); break; } } error = kern_sched_setparam(td, tdt, &sched_param); out: PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getparam(struct thread *td, struct linux_sched_getparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error, policy; tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); if (error) { PROC_UNLOCK(tdt->td_proc); return (error); } if (linux_map_sched_prio) { error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); if (error) return (error); switch (policy) { case SCHED_OTHER: sched_param.sched_priority = 0; break; case SCHED_FIFO: case SCHED_RR: /* * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). */ sched_param.sched_priority = (sched_param.sched_priority * (LINUX_MAX_RT_PRIO - 1) + (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; break; } } else PROC_UNLOCK(tdt->td_proc); error = copyout(&sched_param, uap->param, sizeof(sched_param)); return (error); } static const struct cpuset_copy_cb copy_set = { .cpuset_copyin = copyin, .cpuset_copyout = copyout }; /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; struct thread *tdt; if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr, ©_set); if (error == 0) td->td_retval[0] = sizeof(cpuset_t); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct thread *tdt; if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr, ©_set)); } struct linux_rlimit64 { uint64_t rlim_cur; uint64_t rlim_max; }; int linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) { struct rlimit rlim, nrlim; struct linux_rlimit64 lrlim; struct proc *p; u_int which; int flags; int error; if (args->new == NULL && args->old != NULL) { if (linux_get_dummy_limit(args->resource, &rlim)) { lrlim.rlim_cur = rlim.rlim_cur; lrlim.rlim_max = rlim.rlim_max; return (copyout(&lrlim, args->old, sizeof(lrlim))); } } if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); if (args->new != NULL) { /* * Note. Unlike FreeBSD where rlim is signed 64-bit Linux * rlim is unsigned 64-bit. FreeBSD treats negative limits * as INFINITY so we do not need a conversion even. */ error = copyin(args->new, &nrlim, sizeof(nrlim)); if (error != 0) return (error); } flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; if (args->pid == 0) { p = td->td_proc; PHOLD(p); } else { error = pget(args->pid, flags, &p); if (error != 0) return (error); } if (args->old != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur == RLIM_INFINITY) lrlim.rlim_cur = LINUX_RLIM_INFINITY; else lrlim.rlim_cur = rlim.rlim_cur; if (rlim.rlim_max == RLIM_INFINITY) lrlim.rlim_max = LINUX_RLIM_INFINITY; else lrlim.rlim_max = rlim.rlim_max; error = copyout(&lrlim, args->old, sizeof(lrlim)); if (error != 0) goto out; } if (args->new != NULL) error = kern_proc_setrlimit(td, p, which, &nrlim); out: PRELE(p); return (error); } int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { struct l_timespec lts; struct timespec ts, *tsp; int error; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); error = linux_to_native_timespec(&ts, <s); if (error != 0) return (error); tsp = &ts; } else tsp = NULL; error = linux_common_pselect6(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tsp, args->sig); if (error != 0) return (error); if (args->tsp != NULL) { error = native_to_linux_timespec(<s, tsp); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } static int linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, l_uintptr_t *sig) { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; ssp = NULL; if (sig != NULL) { error = copyin(sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); if (lpse6.ss_len != sizeof(l_ss)) return (EINVAL); if (lpse6.ss != 0) { error = copyin(PTRIN(lpse6.ss), &l_ss, sizeof(l_ss)); if (error != 0) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } } else ssp = NULL; /* * Currently glibc changes nanosecond number to microsecond. * This mean losing precision but for now it is hardly seen. */ if (tsp != NULL) { TIMESPEC_TO_TIMEVAL(&utv, tsp); if (itimerfix(&utv)) return (EINVAL); microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_pselect(td, nfds, readfds, writefds, exceptfds, tvp, ssp, LINUX_NFDBITS); if (error == 0 && tsp != NULL) { if (td->td_retval[0] != 0) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, tsp); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_pselect6_time64(struct thread *td, struct linux_pselect6_time64_args *args) { struct l_timespec64 lts; struct timespec ts, *tsp; int error; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); error = linux_to_native_timespec64(&ts, <s); if (error != 0) return (error); tsp = &ts; } else tsp = NULL; error = linux_common_pselect6(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tsp, args->sig); if (error != 0) return (error); if (args->tsp != NULL) { error = native_to_linux_timespec64(<s, tsp); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_ppoll(struct thread *td, struct linux_ppoll_args *args) { struct timespec uts, *tsp; struct l_timespec lts; int error; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); tsp = &uts; } else tsp = NULL; error = linux_common_ppoll(td, args->fds, args->nfds, tsp, args->sset, args->ssize); if (error != 0) return (error); if (tsp != NULL) { error = native_to_linux_timespec(<s, tsp); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } static int linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) { struct timespec ts0, ts1; struct pollfd stackfds[32]; struct pollfd *kfds; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; if (kern_poll_maxfds(nfds)) return (EINVAL); if (sset != NULL) { if (ssize != sizeof(l_ss)) return (EINVAL); error = copyin(sset, &l_ss, sizeof(l_ss)); if (error) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } else ssp = NULL; if (tsp != NULL) nanotime(&ts0); if (nfds > nitems(stackfds)) kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else kfds = stackfds; error = linux_pollin(td, kfds, fds, nfds); if (error != 0) goto out; error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); if (error == 0) error = linux_pollout(td, kfds, fds, nfds); if (error == 0 && tsp != NULL) { if (td->td_retval[0]) { nanotime(&ts1); timespecsub(&ts1, &ts0, &ts1); timespecsub(tsp, &ts1, tsp); if (tsp->tv_sec < 0) timespecclear(tsp); } else timespecclear(tsp); } out: if (nfds > nitems(stackfds)) free(kfds, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) { struct timespec uts, *tsp; struct l_timespec64 lts; int error; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); error = linux_to_native_timespec64(&uts, <s); if (error != 0) return (error); tsp = &uts; } else tsp = NULL; error = linux_common_ppoll(td, args->fds, args->nfds, tsp, args->sset, args->ssize); if (error != 0) return (error); if (tsp != NULL) { error = native_to_linux_timespec64(<s, tsp); if (error == 0) error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) { int error; u_int i; error = copyin(ufds, fds, nfd * sizeof(*fds)); if (error != 0) return (error); for (i = 0; i < nfd; i++) { if (fds->events != 0) linux_to_bsd_poll_events(td, fds->fd, fds->events, &fds->events); fds++; } return (0); } static int linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) { int error = 0; u_int i, n = 0; for (i = 0; i < nfd; i++) { if (fds->revents != 0) { bsd_to_linux_poll_events(fds->revents, &fds->revents); n++; } error = copyout(&fds->revents, &ufds->revents, sizeof(ufds->revents)); if (error) return (error); fds++; ufds++; } td->td_retval[0] = n; return (0); } int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; struct l_timespec lts; struct thread *tdt; int error; /* * According to man in case the invalid pid specified * EINVAL should be returned. */ if (uap->pid < 0) return (EINVAL); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, &ts); PROC_UNLOCK(tdt->td_proc); if (error != 0) return (error); error = native_to_linux_timespec(<s, &ts); if (error != 0) return (error); return (copyout(<s, uap->interval, sizeof(lts))); } /* * In case when the Linux thread is the initial thread in * the thread group thread id is equal to the process id. * Glibc depends on this magic (assert in pthread_getattr_np.c). */ struct thread * linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) { struct linux_emuldata *em; struct thread *tdt; struct proc *p; tdt = NULL; if (tid == 0 || tid == td->td_tid) { tdt = td; PROC_LOCK(tdt->td_proc); } else if (tid > PID_MAX) tdt = tdfind(tid, pid); else { /* * Initial thread where the tid equal to the pid. */ p = pfind(tid); if (p != NULL) { if (SV_PROC_ABI(p) != SV_ABI_LINUX) { /* * p is not a Linuxulator process. */ PROC_UNLOCK(p); return (NULL); } FOREACH_THREAD_IN_PROC(p, tdt) { em = em_find(tdt); if (tid == em->em_tid) return (tdt); } PROC_UNLOCK(p); } return (NULL); } return (tdt); } void linux_to_bsd_waitopts(int options, int *bsdopts) { if (options & LINUX_WNOHANG) *bsdopts |= WNOHANG; if (options & LINUX_WUNTRACED) *bsdopts |= WUNTRACED; if (options & LINUX_WEXITED) *bsdopts |= WEXITED; if (options & LINUX_WCONTINUED) *bsdopts |= WCONTINUED; if (options & LINUX_WNOWAIT) *bsdopts |= WNOWAIT; if (options & __WCLONE) *bsdopts |= WLINUXCLONE; } int linux_getrandom(struct thread *td, struct linux_getrandom_args *args) { struct uio uio; struct iovec iov; int error; if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) return (EINVAL); if (args->count > INT_MAX) args->count = INT_MAX; iov.iov_base = args->buf; iov.iov_len = args->count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = iov.iov_len; uio.uio_segflg = UIO_USERSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); if (error == 0) td->td_retval[0] = args->count - uio.uio_resid; return (error); } int linux_mincore(struct thread *td, struct linux_mincore_args *args) { /* Needs to be page-aligned */ if (args->start & PAGE_MASK) return (EINVAL); return (kern_mincore(td, args->start, args->len, args->vec)); } #define SYSLOG_TAG "<6>" int linux_syslog(struct thread *td, struct linux_syslog_args *args) { char buf[128], *src, *dst; u_int seq; int buflen, error; if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { linux_msg(td, "syslog unsupported type 0x%x", args->type); return (EINVAL); } if (args->len < 6) { td->td_retval[0] = 0; return (0); } error = priv_check(td, PRIV_MSGBUF); if (error) return (error); mtx_lock(&msgbuf_lock); msgbuf_peekbytes(msgbufp, NULL, 0, &seq); mtx_unlock(&msgbuf_lock); dst = args->buf; error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); /* The -1 is to skip the trailing '\0'. */ dst += sizeof(SYSLOG_TAG) - 1; while (error == 0) { mtx_lock(&msgbuf_lock); buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); mtx_unlock(&msgbuf_lock); if (buflen == 0) break; for (src = buf; src < buf + buflen && error == 0; src++) { if (*src == '\0') continue; if (dst >= args->buf + args->len) goto out; error = copyout(src, dst, 1); dst++; if (*src == '\n' && *(src + 1) != '<' && dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); dst += sizeof(SYSLOG_TAG) - 1; } } } out: td->td_retval[0] = dst - args->buf; return (error); } int linux_getcpu(struct thread *td, struct linux_getcpu_args *args) { int cpu, error, node; cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ error = 0; node = cpuid_to_pcpu[cpu]->pc_domain; if (args->cpu != NULL) error = copyout(&cpu, args->cpu, sizeof(l_int)); if (args->node != NULL) error = copyout(&node, args->node, sizeof(l_int)); return (error); } #if defined(__i386__) || defined(__amd64__) int linux_poll(struct thread *td, struct linux_poll_args *args) { struct timespec ts, *tsp; if (args->timeout != INFTIM) { if (args->timeout < 0) return (EINVAL); ts.tv_sec = args->timeout / 1000; ts.tv_nsec = (args->timeout % 1000) * 1000000; tsp = &ts; } else tsp = NULL; return (linux_common_ppoll(td, args->fds, args->nfds, tsp, NULL, 0)); } #endif /* __i386__ || __amd64__ */ diff --git a/sys/compat/linux/linux_stats.c b/sys/compat/linux/linux_stats.c index 97bace941943..9dc7686cb963 100644 --- a/sys/compat/linux/linux_stats.c +++ b/sys/compat/linux/linux_stats.c @@ -1,802 +1,802 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include static void translate_vnhook_major_minor(struct vnode *vp, struct stat *sb) { int major, minor; if (vn_isdisk(vp)) { sb->st_mode &= ~S_IFMT; sb->st_mode |= S_IFBLK; } /* * Return the same st_dev for every devfs instance. The reason * for this is to work around an idiosyncrasy of glibc getttynam() * implementation: it checks whether st_dev returned for fd 0 * is the same as st_dev returned for the target of /proc/self/fd/0 * symlink, and with linux chroots having their own devfs instance, * the check will fail if you chroot into it. */ if (rootdevmp != NULL && vp->v_mount->mnt_vfc == rootdevmp->mnt_vfc) sb->st_dev = rootdevmp->mnt_stat.f_fsid.val[0]; if (linux_vn_get_major_minor(vp, &major, &minor) == 0) sb->st_rdev = (major << 8 | minor); } static int linux_kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp) { return (kern_statat(td, flag, fd, path, pathseg, sbp, translate_vnhook_major_minor)); } #ifdef LINUX_LEGACY_SYSCALLS static int linux_kern_stat(struct thread *td, const char *path, enum uio_seg pathseg, struct stat *sbp) { return (linux_kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); } static int linux_kern_lstat(struct thread *td, const char *path, enum uio_seg pathseg, struct stat *sbp) { return (linux_kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, sbp)); } #endif static void translate_fd_major_minor(struct thread *td, int fd, struct stat *buf) { struct file *fp; struct vnode *vp; struct mount *mp; int major, minor; /* * No capability rights required here. */ if ((!S_ISCHR(buf->st_mode) && !S_ISBLK(buf->st_mode)) || fget(td, fd, &cap_no_rights, &fp) != 0) return; vp = fp->f_vnode; if (vp != NULL && vn_isdisk(vp)) { buf->st_mode &= ~S_IFMT; buf->st_mode |= S_IFBLK; } if (vp != NULL && rootdevmp != NULL) { mp = vp->v_mount; __compiler_membar(); if (mp != NULL && mp->mnt_vfc == rootdevmp->mnt_vfc) buf->st_dev = rootdevmp->mnt_stat.f_fsid.val[0]; } if (linux_vn_get_major_minor(vp, &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } else if (fp->f_type == DTYPE_PTS) { struct tty *tp = fp->f_data; /* Convert the numbers for the slave device. */ if (linux_driver_get_major_minor(devtoname(tp->t_dev), &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } } fdrop(fp, td); } /* * l_dev_t has the same encoding as dev_t in the latter's low 16 bits, so * truncation of a dev_t to 16 bits gives the same result as unpacking * using major() and minor() and repacking in the l_dev_t format. This * detail is hidden in dev_to_ldev(). Overflow in conversions of dev_t's * are not checked for, as for other fields. * * dev_to_ldev() is only used for translating st_dev. When we convert * st_rdev for copying it out, it isn't really a dev_t, but has already * been translated to an l_dev_t in a nontrivial way. Translating it * again would be illogical but would have no effect since the low 16 * bits have the same encoding. * * The nontrivial translation for st_rdev renumbers some devices, but not * ones that can be mounted on, so it is consistent with the translation * for st_dev except when the renumbering or truncation causes conflicts. */ #define dev_to_ldev(d) ((uint16_t)(d)) static int newstat_copyout(struct stat *buf, void *ubuf) { struct l_newstat tbuf; bzero(&tbuf, sizeof(tbuf)); tbuf.st_dev = dev_to_ldev(buf->st_dev); tbuf.st_ino = buf->st_ino; tbuf.st_mode = buf->st_mode; tbuf.st_nlink = buf->st_nlink; tbuf.st_uid = buf->st_uid; tbuf.st_gid = buf->st_gid; tbuf.st_rdev = buf->st_rdev; tbuf.st_size = buf->st_size; tbuf.st_atim.tv_sec = buf->st_atim.tv_sec; tbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; tbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; tbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; tbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; tbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; tbuf.st_blksize = buf->st_blksize; tbuf.st_blocks = buf->st_blocks; return (copyout(&tbuf, ubuf, sizeof(tbuf))); } static int statx_copyout(struct stat *buf, void *ubuf) { struct l_statx tbuf; bzero(&tbuf, sizeof(tbuf)); tbuf.stx_mask = STATX_ALL; tbuf.stx_blksize = buf->st_blksize; tbuf.stx_attributes = 0; tbuf.stx_nlink = buf->st_nlink; tbuf.stx_uid = buf->st_uid; tbuf.stx_gid = buf->st_gid; tbuf.stx_mode = buf->st_mode; tbuf.stx_ino = buf->st_ino; tbuf.stx_size = buf->st_size; tbuf.stx_blocks = buf->st_blocks; tbuf.stx_atime.tv_sec = buf->st_atim.tv_sec; tbuf.stx_atime.tv_nsec = buf->st_atim.tv_nsec; tbuf.stx_btime.tv_sec = buf->st_birthtim.tv_sec; tbuf.stx_btime.tv_nsec = buf->st_birthtim.tv_nsec; tbuf.stx_ctime.tv_sec = buf->st_ctim.tv_sec; tbuf.stx_ctime.tv_nsec = buf->st_ctim.tv_nsec; tbuf.stx_mtime.tv_sec = buf->st_mtim.tv_sec; tbuf.stx_mtime.tv_nsec = buf->st_mtim.tv_nsec; tbuf.stx_rdev_major = buf->st_rdev >> 8; tbuf.stx_rdev_minor = buf->st_rdev & 0xff; tbuf.stx_dev_major = buf->st_dev >> 8; tbuf.stx_dev_minor = buf->st_dev & 0xff; return (copyout(&tbuf, ubuf, sizeof(tbuf))); } #ifdef LINUX_LEGACY_SYSCALLS int linux_newstat(struct thread *td, struct linux_newstat_args *args) { struct stat buf; char *path; int error; if (!LUSECONVPATH(td)) { error = linux_kern_stat(td, args->path, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); LFREEPATH(path); } if (error) return (error); return (newstat_copyout(&buf, args->buf)); } int linux_newlstat(struct thread *td, struct linux_newlstat_args *args) { struct stat sb; char *path; int error; if (!LUSECONVPATH(td)) { error = linux_kern_lstat(td, args->path, UIO_USERSPACE, &sb); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = linux_kern_lstat(td, path, UIO_SYSSPACE, &sb); LFREEPATH(path); } if (error) return (error); return (newstat_copyout(&sb, args->buf)); } #endif int linux_newfstat(struct thread *td, struct linux_newfstat_args *args) { struct stat buf; int error; error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = newstat_copyout(&buf, args->buf); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat_copyout(struct stat *buf, void *ubuf) { struct l_stat lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = dev_to_ldev(buf->st_dev); lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; lbuf.st_size = MIN(buf->st_size, INT32_MAX); lbuf.st_atim.tv_sec = buf->st_atim.tv_sec; lbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; lbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; lbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; lbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; lbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; lbuf.st_flags = buf->st_flags; lbuf.st_gen = buf->st_gen; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat(struct thread *td, struct linux_stat_args *args) { struct stat buf; char *path; int error; if (!LUSECONVPATH(td)) { error = linux_kern_stat(td, args->path, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); LFREEPATH(path); } if (error) { return (error); } return (stat_copyout(&buf, args->up)); } int linux_lstat(struct thread *td, struct linux_lstat_args *args) { struct stat buf; char *path; int error; if (!LUSECONVPATH(td)) { error = linux_kern_lstat(td, args->path, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); error = linux_kern_lstat(td, path, UIO_SYSSPACE, &buf); LFREEPATH(path); } if (error) { return (error); } return (stat_copyout(&buf, args->up)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ struct l_statfs { l_long f_type; l_long f_bsize; l_long f_blocks; l_long f_bfree; l_long f_bavail; l_long f_files; l_long f_ffree; l_fsid_t f_fsid; l_long f_namelen; l_long f_frsize; l_long f_flags; l_long f_spare[4]; }; #define LINUX_CODA_SUPER_MAGIC 0x73757245L #define LINUX_EXT2_SUPER_MAGIC 0xEF53L #define LINUX_HPFS_SUPER_MAGIC 0xf995e849L #define LINUX_ISOFS_SUPER_MAGIC 0x9660L #define LINUX_MSDOS_SUPER_MAGIC 0x4d44L #define LINUX_NCP_SUPER_MAGIC 0x564cL #define LINUX_NFS_SUPER_MAGIC 0x6969L #define LINUX_NTFS_SUPER_MAGIC 0x5346544EL #define LINUX_PROC_SUPER_MAGIC 0x9fa0L #define LINUX_UFS_SUPER_MAGIC 0x00011954L /* XXX - UFS_MAGIC in Linux */ #define LINUX_ZFS_SUPER_MAGIC 0x2FC12FC1 #define LINUX_DEVFS_SUPER_MAGIC 0x1373L #define LINUX_SHMFS_MAGIC 0x01021994 static long bsd_to_linux_ftype(const char *fstypename) { int i; static struct {const char *bsd_name; long linux_type;} b2l_tbl[] = { {"ufs", LINUX_UFS_SUPER_MAGIC}, {"zfs", LINUX_ZFS_SUPER_MAGIC}, {"cd9660", LINUX_ISOFS_SUPER_MAGIC}, {"nfs", LINUX_NFS_SUPER_MAGIC}, {"ext2fs", LINUX_EXT2_SUPER_MAGIC}, {"procfs", LINUX_PROC_SUPER_MAGIC}, {"msdosfs", LINUX_MSDOS_SUPER_MAGIC}, {"ntfs", LINUX_NTFS_SUPER_MAGIC}, {"nwfs", LINUX_NCP_SUPER_MAGIC}, {"hpfs", LINUX_HPFS_SUPER_MAGIC}, {"coda", LINUX_CODA_SUPER_MAGIC}, {"devfs", LINUX_DEVFS_SUPER_MAGIC}, {"tmpfs", LINUX_SHMFS_MAGIC}, {NULL, 0L}}; for (i = 0; b2l_tbl[i].bsd_name != NULL; i++) if (strcmp(b2l_tbl[i].bsd_name, fstypename) == 0) return (b2l_tbl[i].linux_type); return (0L); } static int bsd_to_linux_statfs(struct statfs *bsd_statfs, struct l_statfs *linux_statfs) { #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) uint64_t tmp; #define LINUX_HIBITS 0xffffffff00000000ULL tmp = bsd_statfs->f_blocks | bsd_statfs->f_bfree | bsd_statfs->f_files | bsd_statfs->f_bsize; if ((bsd_statfs->f_bavail != -1 && (bsd_statfs->f_bavail & LINUX_HIBITS)) || (bsd_statfs->f_ffree != -1 && (bsd_statfs->f_ffree & LINUX_HIBITS)) || (tmp & LINUX_HIBITS)) return (EOVERFLOW); #undef LINUX_HIBITS #endif linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; linux_statfs->f_frsize = bsd_statfs->f_bsize; linux_statfs->f_flags = 0; memset(linux_statfs->f_spare, 0, sizeof(linux_statfs->f_spare)); return (0); } int linux_statfs(struct thread *td, struct linux_statfs_args *args) { struct l_statfs linux_statfs; struct statfs *bsd_statfs; char *path; int error; if (!LUSECONVPATH(td)) { bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, args->path, UIO_USERSPACE, bsd_statfs); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, path, UIO_SYSSPACE, bsd_statfs); LFREEPATH(path); } if (error == 0) error = bsd_to_linux_statfs(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static void bsd_to_linux_statfs64(struct statfs *bsd_statfs, struct l_statfs64 *linux_statfs) { linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; linux_statfs->f_frsize = bsd_statfs->f_bsize; linux_statfs->f_flags = 0; memset(linux_statfs->f_spare, 0, sizeof(linux_statfs->f_spare)); } int linux_statfs64(struct thread *td, struct linux_statfs64_args *args) { struct l_statfs64 linux_statfs; struct statfs *bsd_statfs; char *path; int error; if (args->bufsize != sizeof(struct l_statfs64)) return (EINVAL); if (!LUSECONVPATH(td)) { bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, args->path, UIO_USERSPACE, bsd_statfs); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, path, UIO_SYSSPACE, bsd_statfs); LFREEPATH(path); } if (error == 0) bsd_to_linux_statfs64(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } int linux_fstatfs64(struct thread *td, struct linux_fstatfs64_args *args) { struct l_statfs64 linux_statfs; struct statfs *bsd_statfs; int error; if (args->bufsize != sizeof(struct l_statfs64)) return (EINVAL); bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, args->fd, bsd_statfs); if (error == 0) bsd_to_linux_statfs64(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) { struct l_statfs linux_statfs; struct statfs *bsd_statfs; int error; bsd_statfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, args->fd, bsd_statfs); if (error == 0) error = bsd_to_linux_statfs(bsd_statfs, &linux_statfs); free(bsd_statfs, M_STATFS); if (error != 0) return (error); return (copyout(&linux_statfs, args->buf, sizeof(linux_statfs))); } struct l_ustat { l_daddr_t f_tfree; l_ino_t f_tinode; char f_fname[6]; char f_fpack[6]; }; #ifdef LINUX_LEGACY_SYSCALLS int linux_ustat(struct thread *td, struct linux_ustat_args *args) { return (EOPNOTSUPP); } #endif #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat64_copyout(struct stat *buf, void *ubuf) { struct l_stat64 lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = dev_to_ldev(buf->st_dev); lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; lbuf.st_size = buf->st_size; lbuf.st_atim.tv_sec = buf->st_atim.tv_sec; lbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; lbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; lbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; lbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; lbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; /* * The __st_ino field makes all the difference. In the Linux kernel * it is conditionally compiled based on STAT64_HAS_BROKEN_ST_INO, * but without the assignment to __st_ino the runtime linker refuses * to mmap(2) any shared libraries. I guess it's broken alright :-) */ lbuf.__st_ino = buf->st_ino; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat64(struct thread *td, struct linux_stat64_args *args) { struct stat buf; char *filename; int error; if (!LUSECONVPATH(td)) { error = linux_kern_stat(td, args->filename, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST(td, args->filename, &filename); + LCONVPATHEXIST(args->filename, &filename); error = linux_kern_stat(td, filename, UIO_SYSSPACE, &buf); LFREEPATH(filename); } if (error) return (error); return (stat64_copyout(&buf, args->statbuf)); } int linux_lstat64(struct thread *td, struct linux_lstat64_args *args) { struct stat sb; char *filename; int error; if (!LUSECONVPATH(td)) { error = linux_kern_lstat(td, args->filename, UIO_USERSPACE, &sb); } else { - LCONVPATHEXIST(td, args->filename, &filename); + LCONVPATHEXIST(args->filename, &filename); error = linux_kern_lstat(td, filename, UIO_SYSSPACE, &sb); LFREEPATH(filename); } if (error) return (error); return (stat64_copyout(&sb, args->statbuf)); } int linux_fstat64(struct thread *td, struct linux_fstat64_args *args) { struct stat buf; int error; error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = stat64_copyout(&buf, args->statbuf); return (error); } int linux_fstatat64(struct thread *td, struct linux_fstatat64_args *args) { char *path; int error, dfd, flag, unsupported; struct stat buf; unsupported = args->flag & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH); if (unsupported != 0) { linux_msg(td, "fstatat64 unsupported flag 0x%x", unsupported); return (EINVAL); } flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; flag |= (args->flag & LINUX_AT_EMPTY_PATH) ? AT_EMPTY_PATH : 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { error = linux_kern_statat(td, flag, dfd, args->pathname, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + LCONVPATHEXIST_AT(args->pathname, &path, dfd); error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); LFREEPATH(path); } if (error == 0) error = stat64_copyout(&buf, args->statbuf); return (error); } #else /* __amd64__ && !COMPAT_LINUX32 */ int linux_newfstatat(struct thread *td, struct linux_newfstatat_args *args) { char *path; int error, dfd, flag, unsupported; struct stat buf; unsupported = args->flag & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH); if (unsupported != 0) { linux_msg(td, "fstatat unsupported flag 0x%x", unsupported); return (EINVAL); } flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; flag |= (args->flag & LINUX_AT_EMPTY_PATH) ? AT_EMPTY_PATH : 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { error = linux_kern_statat(td, flag, dfd, args->pathname, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + LCONVPATHEXIST_AT(args->pathname, &path, dfd); error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); LFREEPATH(path); } if (error == 0) error = newstat_copyout(&buf, args->statbuf); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_syncfs(struct thread *td, struct linux_syncfs_args *args) { struct mount *mp; struct vnode *vp; int error, save; error = fgetvp(td, args->fd, &cap_fsync_rights, &vp); if (error != 0) /* * Linux syncfs() returns only EBADF, however fgetvp() * can return EINVAL in case of file descriptor does * not represent a vnode. XXX. */ return (error); mp = vp->v_mount; mtx_lock(&mountlist_mtx); error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error != 0) { /* See comment above. */ mtx_unlock(&mountlist_mtx); goto out; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { save = curthread_pflags_set(TDP_SYNCIO); vfs_periodic(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT); curthread_pflags_restore(save); vn_finished_write(mp); } vfs_unbusy(mp); out: vrele(vp); return (error); } int linux_statx(struct thread *td, struct linux_statx_args *args) { char *path; int error, dirfd, flags, unsupported; struct stat buf; unsupported = args->flags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH); if (unsupported != 0) { linux_msg(td, "statx unsupported flags 0x%x", unsupported); return (EINVAL); } flags = (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; flags |= (args->flags & LINUX_AT_EMPTY_PATH) ? AT_EMPTY_PATH : 0; dirfd = (args->dirfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dirfd; if (!LUSECONVPATH(td)) { error = linux_kern_statat(td, flags, dirfd, args->pathname, UIO_USERSPACE, &buf); } else { - LCONVPATHEXIST_AT(td, args->pathname, &path, dirfd); + LCONVPATHEXIST_AT(args->pathname, &path, dirfd); error = linux_kern_statat(td, flags, dirfd, path, UIO_SYSSPACE, &buf); LFREEPATH(path); } if (error == 0) error = statx_copyout(&buf, args->statxbuf); return (error); } diff --git a/sys/compat/linux/linux_uid16.c b/sys/compat/linux/linux_uid16.c index 5d0bfdde083c..4dd4129cfa0b 100644 --- a/sys/compat/linux/linux_uid16.c +++ b/sys/compat/linux/linux_uid16.c @@ -1,353 +1,353 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 The FreeBSD Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /** * DTrace probes in this module. */ LIN_SDT_PROBE_DEFINE1(uid16, linux_chown16, conv_path, "char *"); LIN_SDT_PROBE_DEFINE1(uid16, linux_lchown16, conv_path, "char *"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_setgroups16, priv_check_cred_error, "int"); LIN_SDT_PROBE_DEFINE1(uid16, linux_getgroups16, copyout_error, "int"); DUMMY(setfsuid16); DUMMY(setfsgid16); DUMMY(getresuid16); DUMMY(getresgid16); #define CAST_NOCHG(x) ((x == 0xFFFF) ? -1 : x) int linux_chown16(struct thread *td, struct linux_chown16_args *args) { char *path; int error; if (!LUSECONVPATH(td) && !SDT_PROBES_ENABLED()) { error = kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), 0); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); /* * The DTrace probes have to be after the LCONVPATHEXIST, as * LCONVPATHEXIST may return on its own and we do not want to * have a stray entry without the corresponding return. */ LIN_SDT_PROBE1(uid16, linux_chown16, conv_path, path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), 0); LFREEPATH(path); } return (error); } int linux_lchown16(struct thread *td, struct linux_lchown16_args *args) { char *path; int error; if (!LUSECONVPATH(td) && !SDT_PROBES_ENABLED()) { error = kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), AT_SYMLINK_NOFOLLOW); } else { - LCONVPATHEXIST(td, args->path, &path); + LCONVPATHEXIST(args->path, &path); /* * The DTrace probes have to be after the LCONVPATHEXIST, as * LCONVPATHEXIST may return on its own and we do not want to * have a stray entry without the corresponding return. */ LIN_SDT_PROBE1(uid16, linux_lchown16, conv_path, path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, CAST_NOCHG(args->uid), CAST_NOCHG(args->gid), AT_SYMLINK_NOFOLLOW); LFREEPATH(path); } return (error); } int linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) { struct ucred *newcred, *oldcred; l_gid16_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->gidset, linux_gidset, ngrp * sizeof(l_gid16_t)); if (error) { LIN_SDT_PROBE1(uid16, linux_setgroups16, copyin_error, error); free(linux_gidset, M_LINUX); return (error); } newcred = crget(); p = td->td_proc; PROC_LOCK(p); oldcred = crcopysafe(p, newcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { PROC_UNLOCK(p); crfree(newcred); LIN_SDT_PROBE1(uid16, linux_setgroups16, priv_check_cred_error, error); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(td->td_proc); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); return (error); } int linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args) { struct ucred *cred; l_gid16_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->gidset, ngrp * sizeof(l_gid16_t)); free(linux_gidset, M_LINUX); if (error) { LIN_SDT_PROBE1(uid16, linux_getgroups16, copyout_error, error); return (error); } td->td_retval[0] = ngrp; return (0); } int linux_getgid16(struct thread *td, struct linux_getgid16_args *args) { td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid16(struct thread *td, struct linux_getuid16_args *args) { td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getegid16(struct thread *td, struct linux_getegid16_args *args) { struct getegid_args bsd; int error; error = sys_getegid(td, &bsd); return (error); } int linux_geteuid16(struct thread *td, struct linux_geteuid16_args *args) { struct geteuid_args bsd; int error; error = sys_geteuid(td, &bsd); return (error); } int linux_setgid16(struct thread *td, struct linux_setgid16_args *args) { struct setgid_args bsd; int error; bsd.gid = args->gid; error = sys_setgid(td, &bsd); return (error); } int linux_setuid16(struct thread *td, struct linux_setuid16_args *args) { struct setuid_args bsd; int error; bsd.uid = args->uid; error = sys_setuid(td, &bsd); return (error); } int linux_setregid16(struct thread *td, struct linux_setregid16_args *args) { struct setregid_args bsd; int error; bsd.rgid = CAST_NOCHG(args->rgid); bsd.egid = CAST_NOCHG(args->egid); error = sys_setregid(td, &bsd); return (error); } int linux_setreuid16(struct thread *td, struct linux_setreuid16_args *args) { struct setreuid_args bsd; int error; bsd.ruid = CAST_NOCHG(args->ruid); bsd.euid = CAST_NOCHG(args->euid); error = sys_setreuid(td, &bsd); return (error); } int linux_setresgid16(struct thread *td, struct linux_setresgid16_args *args) { struct setresgid_args bsd; int error; bsd.rgid = CAST_NOCHG(args->rgid); bsd.egid = CAST_NOCHG(args->egid); bsd.sgid = CAST_NOCHG(args->sgid); error = sys_setresgid(td, &bsd); return (error); } int linux_setresuid16(struct thread *td, struct linux_setresuid16_args *args) { struct setresuid_args bsd; int error; bsd.ruid = CAST_NOCHG(args->ruid); bsd.euid = CAST_NOCHG(args->euid); bsd.suid = CAST_NOCHG(args->suid); error = sys_setresuid(td, &bsd); return (error); } diff --git a/sys/compat/linux/linux_util.c b/sys/compat/linux/linux_util.c index 9e6dea35617f..f1d33faa628d 100644 --- a/sys/compat/linux/linux_util.c +++ b/sys/compat/linux/linux_util.c @@ -1,332 +1,332 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1994 Christos Zoulas * Copyright (c) 1995 Frank van der Linden * Copyright (c) 1995 Scott Bartram * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: svr4_util.c,v 1.5 1995/01/22 23:44:50 christos Exp */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); MALLOC_DEFINE(M_EPOLL, "lepoll", "Linux events structures"); FEATURE(linuxulator_v4l, "V4L ioctl wrapper support in the linuxulator"); FEATURE(linuxulator_v4l2, "V4L2 ioctl wrapper support in the linuxulator"); /** * Special DTrace provider for the linuxulator. * * In this file we define the provider for the entire linuxulator. All * modules (= files of the linuxulator) use it. * * We define a different name depending on the emulated bitsize, see * ../..//linux{,32}/linux.h, e.g.: * native bitsize = linuxulator * amd64, 32bit emulation = linuxulator32 */ LIN_SDT_PROVIDER_DEFINE(linuxulator); LIN_SDT_PROVIDER_DEFINE(linuxulator32); char linux_emul_path[MAXPATHLEN] = "/compat/linux"; SYSCTL_STRING(_compat_linux, OID_AUTO, emul_path, CTLFLAG_RWTUN, linux_emul_path, sizeof(linux_emul_path), "Linux runtime environment path"); static bool use_real_ifnames = false; SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN, &use_real_ifnames, 0, "Use FreeBSD interface names instead of generating ethN aliases"); /* * Search an alternate path before passing pathname arguments on to * system calls. Useful for keeping a separate 'emulation tree'. * * If cflag is set, we check if an attempt can be made to create the * named file, i.e. we check if the directory it should be in exists. */ int -linux_emul_convpath(struct thread *td, const char *path, enum uio_seg pathseg, +linux_emul_convpath(const char *path, enum uio_seg pathseg, char **pbuf, int cflag, int dfd) { int retval; - retval = kern_alternate_path(td, linux_emul_path, path, pathseg, pbuf, + retval = kern_alternate_path(curthread, linux_emul_path, path, pathseg, pbuf, cflag, dfd); return (retval); } void linux_msg(const struct thread *td, const char *fmt, ...) { va_list ap; struct proc *p; if (linux_debug == 0) return; p = td->td_proc; printf("linux: jid %d pid %d (%s): ", p->p_ucred->cr_prison->pr_id, (int)p->p_pid, p->p_comm); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); } struct device_element { TAILQ_ENTRY(device_element) list; struct linux_device_handler entry; }; static TAILQ_HEAD(, device_element) devices = TAILQ_HEAD_INITIALIZER(devices); static struct linux_device_handler null_handler = { "mem", "mem", "null", "null", 1, 3, 1}; DATA_SET(linux_device_handler_set, null_handler); char * linux_driver_get_name_dev(device_t dev) { struct device_element *de; const char *device_name = device_get_name(dev); if (device_name == NULL) return (NULL); TAILQ_FOREACH(de, &devices, list) { if (strcmp(device_name, de->entry.bsd_driver_name) == 0) return (de->entry.linux_driver_name); } return (NULL); } int linux_driver_get_major_minor(const char *node, int *major, int *minor) { struct device_element *de; unsigned long devno; size_t sz; if (node == NULL || major == NULL || minor == NULL) return (1); sz = sizeof("pts/") - 1; if (strncmp(node, "pts/", sz) == 0 && node[sz] != '\0') { /* * Linux checks major and minors of the slave device * to make sure it's a pty device, so let's make him * believe it is. */ devno = strtoul(node + sz, NULL, 10); *major = 136 + (devno / 256); *minor = devno % 256; return (0); } sz = sizeof("dri/card") - 1; if (strncmp(node, "dri/card", sz) == 0 && node[sz] != '\0') { devno = strtoul(node + sz, NULL, 10); *major = 226 + (devno / 256); *minor = devno % 256; return (0); } sz = sizeof("dri/controlD") - 1; if (strncmp(node, "dri/controlD", sz) == 0 && node[sz] != '\0') { devno = strtoul(node + sz, NULL, 10); *major = 226 + (devno / 256); *minor = devno % 256; return (0); } sz = sizeof("dri/renderD") - 1; if (strncmp(node, "dri/renderD", sz) == 0 && node[sz] != '\0') { devno = strtoul(node + sz, NULL, 10); *major = 226 + (devno / 256); *minor = devno % 256; return (0); } sz = sizeof("drm/") - 1; if (strncmp(node, "drm/", sz) == 0 && node[sz] != '\0') { devno = strtoul(node + sz, NULL, 10); *major = 226 + (devno / 256); *minor = devno % 256; return (0); } TAILQ_FOREACH(de, &devices, list) { if (strcmp(node, de->entry.bsd_device_name) == 0) { *major = de->entry.linux_major; *minor = de->entry.linux_minor; return (0); } } return (1); } int linux_vn_get_major_minor(const struct vnode *vp, int *major, int *minor) { int error; if (vp->v_type != VCHR) return (ENOTBLK); dev_lock(); if (vp->v_rdev == NULL) { dev_unlock(); return (ENXIO); } error = linux_driver_get_major_minor(devtoname(vp->v_rdev), major, minor); dev_unlock(); return (error); } char * linux_get_char_devices() { struct device_element *de; char *temp, *string, *last; char formated[256]; int current_size = 0, string_size = 1024; string = malloc(string_size, M_LINUX, M_WAITOK); string[0] = '\000'; last = ""; TAILQ_FOREACH(de, &devices, list) { if (!de->entry.linux_char_device) continue; temp = string; if (strcmp(last, de->entry.bsd_driver_name) != 0) { last = de->entry.bsd_driver_name; snprintf(formated, sizeof(formated), "%3d %s\n", de->entry.linux_major, de->entry.linux_device_name); if (strlen(formated) + current_size >= string_size) { string_size *= 2; string = malloc(string_size, M_LINUX, M_WAITOK); bcopy(temp, string, current_size); free(temp, M_LINUX); } strcat(string, formated); current_size = strlen(string); } } return (string); } void linux_free_get_char_devices(char *string) { free(string, M_LINUX); } static int linux_major_starting = 200; int linux_device_register_handler(struct linux_device_handler *d) { struct device_element *de; if (d == NULL) return (EINVAL); de = malloc(sizeof(*de), M_LINUX, M_WAITOK); if (d->linux_major < 0) { d->linux_major = linux_major_starting++; } bcopy(d, &de->entry, sizeof(*d)); /* Add the element to the list, sorted on span. */ TAILQ_INSERT_TAIL(&devices, de, list); return (0); } int linux_device_unregister_handler(struct linux_device_handler *d) { struct device_element *de; if (d == NULL) return (EINVAL); TAILQ_FOREACH(de, &devices, list) { if (bcmp(d, &de->entry, sizeof(*d)) == 0) { TAILQ_REMOVE(&devices, de, list); free(de, M_LINUX); return (0); } } return (EINVAL); } bool linux_use_real_ifname(const struct ifnet *ifp) { return (use_real_ifnames || !IFP_IS_ETH(ifp)); } diff --git a/sys/compat/linux/linux_util.h b/sys/compat/linux/linux_util.h index d01058867f06..a29ba0bc4af1 100644 --- a/sys/compat/linux/linux_util.h +++ b/sys/compat/linux/linux_util.h @@ -1,201 +1,201 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1994 Christos Zoulas * Copyright (c) 1995 Frank van der Linden * Copyright (c) 1995 Scott Bartram * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: svr4_util.h,v 1.5 1994/11/18 02:54:31 christos Exp * from: linux_util.h,v 1.2 1995/03/05 23:23:50 fvdl Exp * $FreeBSD$ */ #ifndef _LINUX_UTIL_H_ #define _LINUX_UTIL_H_ #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_LINUX); MALLOC_DECLARE(M_EPOLL); extern char linux_emul_path[]; extern int linux_use_emul_path; -int linux_emul_convpath(struct thread *, const char *, enum uio_seg, char **, int, int); +int linux_emul_convpath(const char *, enum uio_seg, char **, int, int); #define LUSECONVPATH(td) atomic_load_int(&linux_use_emul_path) -#define LCONVPATH_AT(td, upath, pathp, i, dfd) \ +#define LCONVPATH_AT(upath, pathp, i, dfd) \ do { \ int _error; \ \ - _error = linux_emul_convpath(td, upath, UIO_USERSPACE, \ + _error = linux_emul_convpath(upath, UIO_USERSPACE, \ pathp, i, dfd); \ if (*(pathp) == NULL) \ return (_error); \ } while (0) -#define LCONVPATH(td, upath, pathp, i) \ - LCONVPATH_AT(td, upath, pathp, i, AT_FDCWD) +#define LCONVPATH(upath, pathp, i) \ + LCONVPATH_AT(upath, pathp, i, AT_FDCWD) -#define LCONVPATHEXIST(td, upath, pathp) LCONVPATH(td, upath, pathp, 0) -#define LCONVPATHEXIST_AT(td, upath, pathp, dfd) LCONVPATH_AT(td, upath, pathp, 0, dfd) -#define LCONVPATHCREAT(td, upath, pathp) LCONVPATH(td, upath, pathp, 1) -#define LCONVPATHCREAT_AT(td, upath, pathp, dfd) LCONVPATH_AT(td, upath, pathp, 1, dfd) +#define LCONVPATHEXIST(upath, pathp) LCONVPATH(upath, pathp, 0) +#define LCONVPATHEXIST_AT(upath, pathp, dfd) LCONVPATH_AT(upath, pathp, 0, dfd) +#define LCONVPATHCREAT(upath, pathp) LCONVPATH(upath, pathp, 1) +#define LCONVPATHCREAT_AT(upath, pathp, dfd) LCONVPATH_AT(upath, pathp, 1, dfd) #define LFREEPATH(path) free(path, M_TEMP) #define DUMMY(s) \ LIN_SDT_PROBE_DEFINE0(dummy, s, not_implemented); \ int \ linux_ ## s(struct thread *td, struct linux_ ## s ## _args *args) \ { \ static pid_t pid; \ \ if (pid != td->td_proc->p_pid) { \ linux_msg(td, "syscall %s not implemented", #s); \ LIN_SDT_PROBE0(dummy, s, not_implemented); \ pid = td->td_proc->p_pid; \ }; \ \ return (ENOSYS); \ } \ struct __hack /* * This is for the syscalls that are not even yet implemented in Linux. * * They're marked as UNIMPL in syscall.master so it will * have nosys record in linux_sysent[]. */ #define UNIMPLEMENTED(s) void linux_msg(const struct thread *td, const char *fmt, ...) __printflike(2, 3); struct linux_device_handler { char *bsd_driver_name; char *linux_driver_name; char *bsd_device_name; char *linux_device_name; int linux_major; int linux_minor; int linux_char_device; }; int linux_device_register_handler(struct linux_device_handler *h); int linux_device_unregister_handler(struct linux_device_handler *h); char *linux_driver_get_name_dev(device_t dev); int linux_driver_get_major_minor(const char *node, int *major, int *minor); int linux_vn_get_major_minor(const struct vnode *vn, int *major, int *minor); char *linux_get_char_devices(void); void linux_free_get_char_devices(char *string); /* * Criteria for interface name translation */ #define IFP_IS_ETH(ifp) ((ifp)->if_type == IFT_ETHER) #define IFP_IS_LOOP(ifp) ((ifp)->if_type == IFT_LOOP) struct ifnet; bool linux_use_real_ifname(const struct ifnet *ifp); #if defined(KTR) #define KTR_LINUX KTR_SUBSYS #define LINUX_CTRFMT(nm, fmt) #nm"("fmt")" #define LINUX_CTR6(f, m, p1, p2, p3, p4, p5, p6) do { \ CTR6(KTR_LINUX, LINUX_CTRFMT(f, m), \ p1, p2, p3, p4, p5, p6); \ } while (0) #define LINUX_CTR(f) LINUX_CTR6(f, "", 0, 0, 0, 0, 0, 0) #define LINUX_CTR0(f, m) LINUX_CTR6(f, m, 0, 0, 0, 0, 0, 0) #define LINUX_CTR1(f, m, p1) LINUX_CTR6(f, m, p1, 0, 0, 0, 0, 0) #define LINUX_CTR2(f, m, p1, p2) LINUX_CTR6(f, m, p1, p2, 0, 0, 0, 0) #define LINUX_CTR3(f, m, p1, p2, p3) LINUX_CTR6(f, m, p1, p2, p3, 0, 0, 0) #define LINUX_CTR4(f, m, p1, p2, p3, p4) LINUX_CTR6(f, m, p1, p2, p3, p4, 0, 0) #define LINUX_CTR5(f, m, p1, p2, p3, p4, p5) LINUX_CTR6(f, m, p1, p2, p3, p4, p5, 0) #else #define LINUX_CTR(f) #define LINUX_CTR0(f, m) #define LINUX_CTR1(f, m, p1) #define LINUX_CTR2(f, m, p1, p2) #define LINUX_CTR3(f, m, p1, p2, p3) #define LINUX_CTR4(f, m, p1, p2, p3, p4) #define LINUX_CTR5(f, m, p1, p2, p3, p4, p5) #define LINUX_CTR6(f, m, p1, p2, p3, p4, p5, p6) #endif /* * Some macros for rate limiting messages: * - noisy if compat.linux.debug = 1 * - print only once if compat.linux.debug > 1 */ #define LINUX_RATELIMIT_MSG_NOTTESTED(_what) \ do { \ static int seen = 0; \ \ if (seen == 0 && linux_debug >= 2) { \ linux_msg(curthread, "%s is not tested, please report on emulation@FreeBSD.org how it works", _what); \ \ if (linux_debug < 3) \ seen = 1; \ } \ } while (0) #define LINUX_RATELIMIT_MSG(_message) \ do { \ static int seen = 0; \ \ if (seen == 0) { \ linux_msg(curthread, _message); \ \ if (linux_debug < 3) \ seen = 1; \ } \ } while (0) #define LINUX_RATELIMIT_MSG_OPT1(_message, _opt1) \ do { \ static int seen = 0; \ \ if (seen == 0) { \ linux_msg(curthread, _message, _opt1); \ \ if (linux_debug < 3) \ seen = 1; \ } \ } while (0) #endif /* ! _LINUX_UTIL_H_ */ diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index c81bf1b4e2e5..dde11f22d4fe 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -1,696 +1,696 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needed for pcb definition in linux_set_thread_area */ #include "opt_posix.h" struct l_descriptor { l_uint entry_number; l_ulong base_addr; l_uint limit; l_uint seg_32bit:1; l_uint contents:2; l_uint read_exec_only:1; l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; }; struct l_old_select_argv { l_int nfds; l_fd_set *readfds; l_fd_set *writefds; l_fd_set *exceptfds; struct l_timeval *timeout; }; int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *newpath; int error; if (!LUSECONVPATH(td)) { error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, args->argp, args->envp); } else { - LCONVPATHEXIST(td, args->path, &newpath); + LCONVPATHEXIST(args->path, &newpath); error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, args->argp, args->envp); LFREEPATH(newpath); } if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } struct l_ipc_kludge { struct l_msgbuf *msgp; l_long msgtyp; }; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = PTRIN(args->ptr); a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = PTRIN(args->ptr); a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == 0) return (EINVAL); error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp)); if (error) return (error); a.msgp = PTRIN(tmp.msgp); a.msgtyp = tmp.msgtyp; } else { a.msgp = PTRIN(args->ptr); a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; l_uintptr_t addr; int error; a.shmid = args->arg1; a.shmaddr = PTRIN(args->ptr); a.shmflg = args->arg2; error = linux_shmat(td, &a); if (error != 0) return (error); addr = td->td_retval[0]; error = copyout(&addr, PTRIN(args->arg3), sizeof(addr)); td->td_retval[0] = 0; return (error); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = PTRIN(args->ptr); return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = linux_args.readfds; newsel.writefds = linux_args.writefds; newsel.exceptfds = linux_args.exceptfds; newsel.timeout = linux_args.timeout; return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct segment_descriptor sd; struct l_user_desc info; int idx, error; int a[2]; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { linux_msg(td, "set_cloned_tls copyin failed!"); } else { idx = info.entry_number; /* * looks like we're getting the idx we returned * in the set_thread_area() syscall */ if (idx != 6 && idx != 3) { linux_msg(td, "set_cloned_tls resetting idx!"); idx = 3; } /* this doesnt happen in practice */ if (idx == 6) { /* we might copy out the entry_number as 3 */ info.entry_number = 3; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) linux_msg(td, "set_cloned_tls copyout failed!"); } a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); memcpy(&sd, &a, sizeof(a)); /* set %gs */ td->td_pcb->pcb_gsd = sd; td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); } return (error); } int linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_esp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent do. */ td->td_frame->tf_eax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, args->addr, args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_ioperm(struct thread *td, struct linux_ioperm_args *args) { int error; struct i386_ioperm_args iia; iia.start = args->start; iia.length = args->length; iia.enable = args->enable; error = i386_set_ioperm(td, &iia); return (error); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) { int error; struct i386_ldt_args ldt; struct l_descriptor ld; union descriptor desc; int size, written; switch (uap->func) { case 0x00: /* read_ldt */ ldt.start = 0; ldt.descs = uap->ptr; ldt.num = uap->bytecount / sizeof(union descriptor); error = i386_get_ldt(td, &ldt); td->td_retval[0] *= sizeof(union descriptor); break; case 0x02: /* read_default_ldt = 0 */ size = 5*sizeof(struct l_desc_struct); if (size > uap->bytecount) size = uap->bytecount; for (written = error = 0; written < size && error == 0; written++) error = subyte((char *)uap->ptr + written, 0); td->td_retval[0] = written; break; case 0x01: /* write_ldt */ case 0x11: /* write_ldt */ if (uap->bytecount != sizeof(ld)) return (EINVAL); error = copyin(uap->ptr, &ld, sizeof(ld)); if (error) return (error); ldt.start = ld.entry_number; ldt.descs = &desc; ldt.num = 1; desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | (ld.contents << 2); desc.sd.sd_dpl = 3; desc.sd.sd_p = (ld.seg_not_present ^ 1); desc.sd.sd_xx = 0; desc.sd.sd_def32 = ld.seg_32bit; desc.sd.sd_gran = ld.limit_in_pages; error = i386_set_ldt(td, &ldt, &desc); break; default: error = ENOSYS; break; } if (error == EOPNOTSUPP) { linux_msg(td, "modify_ldt needs kernel option USER_LDT"); error = ENOSYS; } return (error); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__mask = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__mask; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We dont use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; LINUX_SIGEMPTYSET(mask); mask.__mask = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; int error; int idx; int a[2]; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* * Semantics of Linux version: every thread in the system has array of * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This * syscall loads one of the selected tls decriptors with a value and * also loads GDT descriptors 6, 7 and 8 with the content of the * per-thread descriptors. * * Semantics of FreeBSD version: I think we can ignore that Linux has 3 * per-thread descriptors and use just the 1st one. The tls_array[] * is used only in set/get-thread_area() syscalls and for loading the * GDT descriptors. In FreeBSD we use just one GDT descriptor for TLS * so we will load just one. * * XXX: this doesn't work when a user space process tries to use more * than 1 TLS segment. Comment in the Linux sources says wine might do * this. */ /* * we support just GLIBC TLS now * we should let 3 proceed as well because we use this segment so * if code does two subsequent calls it should succeed */ if (idx != 6 && idx != -1 && idx != 3) return (EINVAL); /* * we have to copy out the GDT entry we use * FreeBSD uses GDT entry #3 for storing %gs so load that * * XXX: what if a user space program doesn't check this value and tries * to use 6, 7 or 8? */ idx = info.entry_number = 3; error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); if (LINUX_LDT_empty(&info)) { a[0] = 0; a[1] = 0; } else { a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); } memcpy(&sd, &a, sizeof(a)); /* this is taken from i386 version of cpu_set_user_tls() */ critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); critical_exit(); return (0); } int linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) { struct l_user_desc info; int error; int idx; struct l_desc_struct desc; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* XXX: I am not sure if we want 3 to be allowed too. */ if (idx != 6 && idx != 3) return (EINVAL); idx = 3; memset(&info, 0, sizeof(info)); sd = PCPU_GET(fsgs_gdt)[1]; memcpy(&desc, &sd, sizeof(desc)); info.entry_number = idx; info.base_addr = LINUX_GET_BASE(&desc); info.limit = LINUX_GET_LIMIT(&desc); info.seg_32bit = LINUX_GET_32BIT(&desc); info.contents = LINUX_GET_CONTENTS(&desc); info.read_exec_only = !LINUX_GET_WRITABLE(&desc); info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); info.seg_not_present = !LINUX_GET_PRESENT(&desc); info.useable = LINUX_GET_USEABLE(&desc); error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (EFAULT); return (0); } /* XXX: this wont work with module - convert it */ int linux_mq_open(struct thread *td, struct linux_mq_open_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_open(td, (struct kmq_open_args *)args)); #else return (ENOSYS); #endif } int linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_unlink(td, (struct kmq_unlink_args *)args)); #else return (ENOSYS); #endif } int linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_timedsend(td, (struct kmq_timedsend_args *)args)); #else return (ENOSYS); #endif } int linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_timedreceive(td, (struct kmq_timedreceive_args *)args)); #else return (ENOSYS); #endif } int linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_notify(td, (struct kmq_notify_args *)args)); #else return (ENOSYS); #endif } int linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_setattr(td, (struct kmq_setattr_args *)args)); #else return (ENOSYS); #endif }