diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 0527ef95cf3b..f64259decbff 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -1,724 +1,724 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf."); void set_top_of_stack_td(struct thread *td) { td->td_md.md_stack_base = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); } struct savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_md.md_stack_base; KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area ptr %#lx td %p", p, td)); return ((struct savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { return (&td->td_md.md_pcb); } struct savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { struct thread *td; td = __containerof(pcb, struct thread, td_md.md_pcb); return (get_pcb_user_save_td(td)); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Common code shared between cpu_fork() and cpu_copy_thread() for * initializing a thread. */ static void copy_thread(struct thread *td1, struct thread *td2) { struct pcb *pcb2; pcb2 = td2->td_pcb; /* Ensure that td1's pcb is up to date for user threads. */ if ((td2->td_pflags & TDP_KTHREAD) == 0) { MPASS(td1 == curthread); fpuexit(td1); update_pcb_bases(td1->td_pcb); } /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); /* Kernel threads start with clean FPU and segment bases. */ if ((td2->td_pflags & TDP_KTHREAD) != 0) { pcb2->pcb_fsbase = 0; pcb2->pcb_gsbase = 0; clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE | PCB_KERNFPU | PCB_KERNFPU_THR); } else { MPASS((pcb2->pcb_flags & (PCB_KERNFPU | PCB_KERNFPU_THR)) == 0); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); } /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); pcb2->pcb_rbx = (register_t)td2; /* fork_trampoline argument */ pcb2->pcb_rip = (register_t)fork_trampoline; /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_[fg]sbase: cloned above */ /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; pmap_thread_init_invl_gen(td2); } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct proc *p1; struct pcb *pcb2; struct mdproc *mdp1, *mdp2; struct proc_ldt *pldt; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ mdp1 = &p1->p_md; mtx_lock(&dt_lock); if ((pldt = mdp1->md_ldt) != NULL && pldt->ldt_refcnt > 1 && user_ldt_alloc(p1, 1) == NULL) panic("could not copy LDT"); mtx_unlock(&dt_lock); } return; } /* Point the stack and pcb to the actual location */ set_top_of_stack_td(td2); td2->td_pcb = pcb2 = get_pcb_td(td2); copy_thread(td1, td2); /* Point mdproc and then copy over p1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_rax = 0; /* Child returns zero */ td2->td_frame->tf_rflags &= ~PSL_C; /* success */ td2->td_frame->tf_rdx = 1; /* * If the parent process has the trap bit set (i.e. a debugger * had single stepped the process to the system call), we need * to clear the trap flag from the new frame. */ td2->td_frame->tf_rflags &= ~PSL_T; /* As on i386, do not copy io permission bitmap. */ pcb2->pcb_tssp = NULL; /* New segment registers. */ set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; mdp2 = &p2->p_md; if (mdp1->md_ldt == NULL) { mdp2->md_ldt = NULL; return; } mtx_lock(&dt_lock); if (mdp1->md_ldt != NULL) { if (flags & RFMEM) { mdp1->md_ldt->ldt_refcnt++; mdp2->md_ldt = mdp1->md_ldt; bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct system_segment_descriptor)); } else { mdp2->md_ldt = NULL; mdp2->md_ldt = user_ldt_alloc(p2, 0); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); amd64_set_ldt_data(td2, 0, max_ldt_segment, (struct user_segment_descriptor *) mdp1->md_ldt->ldt_base); } } else mdp2->md_ldt = NULL; mtx_unlock(&dt_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_rsp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %rbx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_r12 = (long) func; /* function */ td->td_pcb->pcb_rbx = (long) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. */ if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); } void cpu_thread_exit(struct thread *td) { struct pcb *pcb; critical_enter(); if (td == PCPU_GET(fpcurthread)) fpudrop(); critical_exit(); pcb = td->td_pcb; /* Disable any hardware breakpoints. */ if (pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); clear_pcb_flags(pcb, PCB_DBREGS); } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; /* * Clean TSS/iomap */ if (pcb->pcb_tssp != NULL) { pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp, (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1)); kmem_free((vm_offset_t)pcb->pcb_tssp, ctob(IOPAGES + 1)); pcb->pcb_tssp = NULL; } } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; set_top_of_stack_td(td); td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } bool cpu_exec_vmspace_reuse(struct proc *p, vm_map_t map) { return (((curproc->p_md.md_flags & P_MD_KPTI) != 0) == (vm_map_pmap(map)->pm_ucr3 != PMAP_NO_CR3)); } static void cpu_procctl_kpti_ctl(struct proc *p, int val) { if (pti && val == PROC_KPTI_CTL_ENABLE_ON_EXEC) p->p_md.md_flags |= P_MD_KPTI; if (val == PROC_KPTI_CTL_DISABLE_ON_EXEC) p->p_md.md_flags &= ~P_MD_KPTI; } static void cpu_procctl_kpti_status(struct proc *p, int *val) { *val = (p->p_md.md_flags & P_MD_KPTI) != 0 ? PROC_KPTI_CTL_ENABLE_ON_EXEC: PROC_KPTI_CTL_DISABLE_ON_EXEC; if (vmspace_pmap(p->p_vmspace)->pm_ucr3 != PMAP_NO_CR3) *val |= PROC_KPTI_STATUS_ACTIVE; } static int cpu_procctl_la_ctl(struct proc *p, int val) { int error; error = 0; switch (val) { case PROC_LA_CTL_LA48_ON_EXEC: p->p_md.md_flags |= P_MD_LA48; p->p_md.md_flags &= ~P_MD_LA57; break; case PROC_LA_CTL_LA57_ON_EXEC: if (la57) { p->p_md.md_flags &= ~P_MD_LA48; p->p_md.md_flags |= P_MD_LA57; } else { error = ENOTSUP; } break; case PROC_LA_CTL_DEFAULT_ON_EXEC: p->p_md.md_flags &= ~(P_MD_LA48 | P_MD_LA57); break; } return (error); } static void cpu_procctl_la_status(struct proc *p, int *val) { int res; if ((p->p_md.md_flags & P_MD_LA48) != 0) res = PROC_LA_CTL_LA48_ON_EXEC; else if ((p->p_md.md_flags & P_MD_LA57) != 0) res = PROC_LA_CTL_LA57_ON_EXEC; else res = PROC_LA_CTL_DEFAULT_ON_EXEC; if (p->p_sysent->sv_maxuser == VM_MAXUSER_ADDRESS_LA48) res |= PROC_LA_STATUS_LA48; else res |= PROC_LA_STATUS_LA57; *val = res; } int cpu_procctl(struct thread *td, int idtype, id_t id, int com, void *data) { struct proc *p; int error, val; switch (com) { case PROC_KPTI_CTL: case PROC_KPTI_STATUS: case PROC_LA_CTL: case PROC_LA_STATUS: if (idtype != P_PID) { error = EINVAL; break; } if (com == PROC_KPTI_CTL) { /* sad but true and not a joke */ error = priv_check(td, PRIV_IO); if (error != 0) break; } if (com == PROC_KPTI_CTL || com == PROC_LA_CTL) { error = copyin(data, &val, sizeof(val)); if (error != 0) break; } if (com == PROC_KPTI_CTL && val != PROC_KPTI_CTL_ENABLE_ON_EXEC && val != PROC_KPTI_CTL_DISABLE_ON_EXEC) { error = EINVAL; break; } if (com == PROC_LA_CTL && val != PROC_LA_CTL_LA48_ON_EXEC && val != PROC_LA_CTL_LA57_ON_EXEC && val != PROC_LA_CTL_DEFAULT_ON_EXEC) { error = EINVAL; break; } error = pget(id, PGET_CANSEE | PGET_NOTWEXIT | PGET_NOTID, &p); if (error != 0) break; switch (com) { case PROC_KPTI_CTL: cpu_procctl_kpti_ctl(p, val); break; case PROC_KPTI_STATUS: cpu_procctl_kpti_status(p, &val); break; case PROC_LA_CTL: error = cpu_procctl_la_ctl(p, val); break; case PROC_LA_STATUS: cpu_procctl_la_status(p, &val); break; } PROC_UNLOCK(p); if (com == PROC_KPTI_STATUS || com == PROC_LA_STATUS) error = copyout(&val, data, sizeof(val)); break; default: error = EINVAL; break; } return (error); } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; if (__predict_true(error == 0)) { frame->tf_rax = td->td_retval[0]; frame->tf_rdx = td->td_retval[1]; frame->tf_rflags &= ~PSL_C; return; } switch (error) { case ERESTART: /* * Reconstruct pc, we know that 'syscall' is 2 bytes, * lcall $X,y is 7 bytes, int 0x80 is 2 bytes. * We saved this in tf_err. * %r10 (which was holding the value of %rcx) is restored * for the next iteration. * %r10 restore is only required for freebsd/amd64 processes, * but shall be innocent for any ia32 ABI. * * Require full context restore to get the arguments * in the registers reloaded at return to usermode. */ frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); break; case EJUSTRETURN: break; default: frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { copy_thread(td0, td); /* * Copy user general-purpose registers. * * Some of these registers are rewritten by cpu_set_upcall() - * and linux_set_upcall_kse(). + * and linux_set_upcall(). */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_rflags &= ~PSL_T; set_pcb_flags_raw(td->td_pcb, PCB_FULL_IRET); } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_rbp = 0; td->td_frame->tf_rsp = (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_rip = (uintptr_t)entry; /* Return address sentinel value to stop stack unwinding. */ suword32((void *)td->td_frame->tf_rsp, 0); /* Pass the argument to the entry point. */ suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)), (uint32_t)(uintptr_t)arg); return; } #endif /* * Set the trap frame to point at the beginning of the uts * function. */ td->td_frame->tf_rbp = 0; td->td_frame->tf_rsp = ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f; td->td_frame->tf_rsp -= 8; td->td_frame->tf_rip = (register_t)entry; td->td_frame->tf_ds = _udatasel; td->td_frame->tf_es = _udatasel; td->td_frame->tf_fs = _ufssel; td->td_frame->tf_gs = _ugssel; td->td_frame->tf_flags = TF_HASSEGS; /* Return address sentinel value to stop stack unwinding. */ suword((void *)td->td_frame->tf_rsp, 0); /* Pass the argument to the entry point. */ td->td_frame->tf_rdi = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; set_pcb_flags(pcb, PCB_FULL_IRET); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { pcb->pcb_gsbase = (register_t)tls_base; return (0); } #endif pcb->pcb_fsbase = (register_t)tls_base; return (0); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(vm_paddr_t addr) { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c index 01b730c2b19c..a82ab411daa1 100644 --- a/sys/amd64/linux/linux_machdep.c +++ b/sys/amd64/linux/linux_machdep.c @@ -1,343 +1,343 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *path; int error; LINUX_CTR(execve); if (!LUSECONVPATH(td)) { error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, args->argp, args->envp); } else { LCONVPATHEXIST(td, args->path, &path); error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, args->envp); LFREEPATH(path); } if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int -linux_set_upcall_kse(struct thread *td, register_t stack) +linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_rsp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent does. */ td->td_frame->tf_rax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, args->addr, args->len, args->prot, args->flags, args->fd, args->pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, uap->addr, uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, uap->addr, uap->len, uap->behav)); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; LINUX_CTR(iopl); if (args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; LINUX_CTR2(rt_sigsuspend, "%p, %ld", uap->newset, uap->sigsetsize); if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; LINUX_CTR(pause); PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; memset(&lss, 0, sizeof(lss)); LINUX_CTR2(sigaltstack, "%p, %p", uap->uss, uap->uoss); if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = PTRIN(lss.ss_sp); ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = PTROUT(oss.ss_sp); lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_arch_prctl(struct thread *td, struct linux_arch_prctl_args *args) { struct pcb *pcb; int error; pcb = td->td_pcb; LINUX_CTR2(arch_prctl, "0x%x, %p", args->code, args->addr); switch (args->code) { case LINUX_ARCH_SET_GS: if (args->addr < VM_MAXUSER_ADDRESS) { update_pcb_bases(pcb); pcb->pcb_gsbase = args->addr; td->td_frame->tf_gs = _ugssel; error = 0; } else error = EPERM; break; case LINUX_ARCH_SET_FS: if (args->addr < VM_MAXUSER_ADDRESS) { update_pcb_bases(pcb); pcb->pcb_fsbase = args->addr; td->td_frame->tf_fs = _ufssel; error = 0; } else error = EPERM; break; case LINUX_ARCH_GET_FS: error = copyout(&pcb->pcb_fsbase, PTRIN(args->addr), sizeof(args->addr)); break; case LINUX_ARCH_GET_GS: error = copyout(&pcb->pcb_gsbase, PTRIN(args->addr), sizeof(args->addr)); break; default: error = EINVAL; } return (error); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct pcb *pcb; if ((uint64_t)desc >= VM_MAXUSER_ADDRESS) return (EPERM); pcb = td->td_pcb; update_pcb_bases(pcb); pcb->pcb_fsbase = (register_t)desc; td->td_frame->tf_fs = _ufssel; return (0); } int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xchgl_smap : futex_xchgl_nosmap); } int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_addl_smap : futex_addl_nosmap); } int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_orl_smap : futex_orl_nosmap); } int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_andl_smap : futex_andl_nosmap); } int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xorl_smap : futex_xorl_nosmap); } diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index fde180d74d73..8ae82f3df8ca 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -1,767 +1,767 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); struct l_old_select_argv { l_int nfds; l_uintptr_t readfds; l_uintptr_t writefds; l_uintptr_t exceptfds; l_uintptr_t timeout; } __packed; static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) { lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; lru->ru_maxrss = ru->ru_maxrss; lru->ru_ixrss = ru->ru_ixrss; lru->ru_idrss = ru->ru_idrss; lru->ru_isrss = ru->ru_isrss; lru->ru_minflt = ru->ru_minflt; lru->ru_majflt = ru->ru_majflt; lru->ru_nswap = ru->ru_nswap; lru->ru_inblock = ru->ru_inblock; lru->ru_oublock = ru->ru_oublock; lru->ru_msgsnd = ru->ru_msgsnd; lru->ru_msgrcv = ru->ru_msgrcv; lru->ru_nsignals = ru->ru_nsignals; lru->ru_nvcsw = ru->ru_nvcsw; lru->ru_nivcsw = ru->ru_nivcsw; } int linux_copyout_rusage(struct rusage *ru, void *uaddr) { struct l_rusage lru; bsd_to_linux_rusage(ru, &lru); return (copyout(&lru, uaddr, sizeof(struct l_rusage))); } int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, args->envp); free(path, M_TEMP); if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } CTASSERT(sizeof(struct l_iovec32) == 8); int linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) { struct l_iovec32 iov32; struct iovec *iov; struct uio *uio; uint32_t iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, int error) { struct l_iovec32 iov32; struct iovec *iov; uint32_t iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return(0); } int linux_readv(struct thread *td, struct linux_readv_args *uap) { struct uio *auio; int error; error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int linux_writev(struct thread *td, struct linux_writev_args *uap) { struct uio *auio; int error; error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } struct l_ipc_kludge { l_uintptr_t msgp; l_long msgtyp; } __packed; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = PTRIN(args->ptr); a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = PTRIN(args->ptr); a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == 0) return (EINVAL); error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp)); if (error) return (error); a.msgp = PTRIN(tmp.msgp); a.msgtyp = tmp.msgtyp; } else { a.msgp = PTRIN(args->ptr); a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; l_uintptr_t addr; int error; a.shmid = args->arg1; a.shmaddr = PTRIN(args->ptr); a.shmflg = args->arg2; error = linux_shmat(td, &a); if (error != 0) return (error); addr = td->td_retval[0]; error = copyout(&addr, PTRIN(args->arg3), sizeof(addr)); td->td_retval[0] = 0; return (error); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = PTRIN(args->ptr); return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = PTRIN(linux_args.readfds); newsel.writefds = PTRIN(linux_args.writefds); newsel.exceptfds = PTRIN(linux_args.exceptfds); newsel.timeout = PTRIN(linux_args.timeout); return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct l_user_desc info; struct pcb *pcb; int error; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { linux_msg(td, "set_cloned_tls copyin info failed!"); } else { /* We might copy out the entry_number as GUGS32_SEL. */ info.entry_number = GUGS32_SEL; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) linux_msg(td, "set_cloned_tls copyout info failed!"); pcb = td->td_pcb; update_pcb_bases(pcb); pcb->pcb_gsbase = (register_t)info.base_addr; td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); } return (error); } int -linux_set_upcall_kse(struct thread *td, register_t stack) +linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_rsp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent do. */ td->td_frame->tf_rax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__mask = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__mask; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We don't use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; LINUX_SIGEMPTYSET(mask); mask.__mask = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = PTRIN(lss.ss_sp); ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = PTROUT(oss.ss_sp); lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) { struct timeval atv; l_timeval atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); atv32.tv_sec = atv.tv_sec; atv32.tv_usec = atv.tv_usec; error = copyout(&atv32, uap->tp, sizeof(atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof(rtz)); } return (error); } int linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) { l_timeval atv32; struct timeval atv, *tvp; struct timezone atz, *tzp; int error; if (uap->tp) { error = copyin(uap->tp, &atv32, sizeof(atv32)); if (error) return (error); atv.tv_sec = atv32.tv_sec; atv.tv_usec = atv32.tv_usec; tvp = &atv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &atz, sizeof(atz)); if (error) return (error); tzp = &atz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) { struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error != 0) return (error); if (uap->rusage != NULL) error = linux_copyout_rusage(&s, uap->rusage); return (error); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; struct pcb *pcb; int error; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); /* * Semantics of Linux version: every thread in the system has array * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. * This syscall loads one of the selected TLS decriptors with a value * and also loads GDT descriptors 6, 7 and 8 with the content of * the per-thread descriptors. * * Semantics of FreeBSD version: I think we can ignore that Linux has * three per-thread descriptors and use just the first one. * The tls_array[] is used only in [gs]et_thread_area() syscalls and * for loading the GDT descriptors. We use just one GDT descriptor * for TLS, so we will load just one. * * XXX: This doesn't work when a user space process tries to use more * than one TLS segment. Comment in the Linux source says wine might * do this. */ /* * GLIBC reads current %gs and call set_thread_area() with it. * We should let GUDATA_SEL and GUGS32_SEL proceed as well because * we use these segments. */ switch (info.entry_number) { case GUGS32_SEL: case GUDATA_SEL: case 6: case -1: info.entry_number = GUGS32_SEL; break; default: return (EINVAL); } /* * We have to copy out the GDT entry we use. * * XXX: What if a user space program does not check the return value * and tries to use 6, 7 or 8? */ error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); pcb = td->td_pcb; update_pcb_bases(pcb); pcb->pcb_gsbase = (register_t)info.base_addr; update_gdt_gsbase(td, info.base_addr); return (0); } int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xchgl_smap : futex_xchgl_nosmap); } int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_addl_smap : futex_addl_nosmap); } int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_orl_smap : futex_orl_nosmap); } int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_andl_smap : futex_andl_nosmap); } int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval); DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *)) { return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? futex_xorl_smap : futex_xorl_nosmap); } diff --git a/sys/arm64/linux/linux_machdep.c b/sys/arm64/linux/linux_machdep.c index 512bf1cc3398..711ccb4fd63d 100644 --- a/sys/arm64/linux/linux_machdep.c +++ b/sys/arm64/linux/linux_machdep.c @@ -1,146 +1,146 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Turing Robotic Industries Inc. * Copyright (c) 2000 Marcel Moolenaar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); /* DTrace probes */ -LIN_SDT_PROBE_DEFINE0(machdep, linux_set_upcall_kse, todo); +LIN_SDT_PROBE_DEFINE0(machdep, linux_set_upcall, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_mmap2, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_rt_sigsuspend, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_sigaltstack, todo); LIN_SDT_PROBE_DEFINE0(machdep, linux_set_cloned_tls, todo); /* * LINUXTODO: deduplicate; linux_execve is common across archs, except that on * amd64 compat linuxulator it calls freebsd32_exec_copyin_args. */ int linux_execve(struct thread *td, struct linux_execve_args *uap) { struct image_args eargs; char *path; int error; if (!LUSECONVPATH(td)) { error = exec_copyin_args(&eargs, uap->path, UIO_USERSPACE, uap->argp, uap->envp); } else { LCONVPATHEXIST(td, uap->path, &path); error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); LFREEPATH(path); } if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } -/* LINUXTODO: implement (or deduplicate) arm64 linux_set_upcall_kse */ +/* LINUXTODO: implement (or deduplicate) arm64 linux_set_upcall */ int -linux_set_upcall_kse(struct thread *td, register_t stack) +linux_set_upcall(struct thread *td, register_t stack) { - LIN_SDT_PROBE0(machdep, linux_set_upcall_kse, todo); + LIN_SDT_PROBE0(machdep, linux_set_upcall, todo); return (EDOOFUS); } /* LINUXTODO: deduplicate arm64 linux_mmap2 */ int linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) { LIN_SDT_PROBE0(machdep, linux_mmap2, todo); return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, uap->flags, uap->fd, uap->pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } /* LINUXTODO: implement arm64 linux_rt_sigsuspend */ int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { LIN_SDT_PROBE0(machdep, linux_rt_sigsuspend, todo); return (EDOOFUS); } /* LINUXTODO: implement arm64 linux_sigaltstack */ int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { LIN_SDT_PROBE0(machdep, linux_sigaltstack, todo); return (EDOOFUS); } /* LINUXTODO: implement arm64 linux_set_cloned_tls */ int linux_set_cloned_tls(struct thread *td, void *desc) { LIN_SDT_PROBE0(machdep, linux_set_cloned_tls, todo); return (EDOOFUS); } diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index 3ba40fc2705e..ed4adcf8a175 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -1,449 +1,449 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #ifdef LINUX_LEGACY_SYSCALLS int linux_fork(struct thread *td, struct linux_fork_args *args) { struct fork_req fr; int error; struct proc *p2; struct thread *td2; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &p2; if ((error = fork1(td, &fr)) != 0) return (error); td2 = FIRST_THREAD_IN_PROC(p2); linux_proc_init(td, td2, 0); td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); return (0); } int linux_vfork(struct thread *td, struct linux_vfork_args *args) { struct fork_req fr; int error; struct proc *p2; struct thread *td2; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFMEM | RFPPWAIT | RFSTOPPED; fr.fr_procp = &p2; if ((error = fork1(td, &fr)) != 0) return (error); td2 = FIRST_THREAD_IN_PROC(p2); linux_proc_init(td, td2, 0); td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); return (0); } #endif static int linux_clone_proc(struct thread *td, struct linux_clone_args *args) { struct fork_req fr; int error, ff = RFPROC | RFSTOPPED, f2; struct proc *p2; struct thread *td2; int exit_signal; struct linux_emuldata *em; f2 = 0; exit_signal = args->flags & 0x000000ff; if (LINUX_SIG_VALID(exit_signal)) { exit_signal = linux_to_bsd_signal(exit_signal); } else if (exit_signal != 0) return (EINVAL); if (args->flags & LINUX_CLONE_VM) ff |= RFMEM; if (args->flags & LINUX_CLONE_SIGHAND) ff |= RFSIGSHARE; if (args->flags & LINUX_CLONE_FILES) { if (!(args->flags & LINUX_CLONE_FS)) f2 |= FR2_SHARE_PATHS; } else { ff |= RFFDG; if (args->flags & LINUX_CLONE_FS) f2 |= FR2_SHARE_PATHS; } if (args->flags & LINUX_CLONE_PARENT_SETTID) if (args->parent_tidptr == NULL) return (EINVAL); if (args->flags & LINUX_CLONE_VFORK) ff |= RFPPWAIT; bzero(&fr, sizeof(fr)); fr.fr_flags = ff; fr.fr_flags2 = f2; fr.fr_procp = &p2; error = fork1(td, &fr); if (error) return (error); td2 = FIRST_THREAD_IN_PROC(p2); /* create the emuldata */ linux_proc_init(td, td2, args->flags); em = em_find(td2); KASSERT(em != NULL, ("clone_proc: emuldata not found.\n")); if (args->flags & LINUX_CLONE_CHILD_SETTID) em->child_set_tid = args->child_tidptr; else em->child_set_tid = NULL; if (args->flags & LINUX_CLONE_CHILD_CLEARTID) em->child_clear_tid = args->child_tidptr; else em->child_clear_tid = NULL; if (args->flags & LINUX_CLONE_PARENT_SETTID) { error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid)); if (error) linux_msg(td, "copyout p_pid failed!"); } PROC_LOCK(p2); p2->p_sigparent = exit_signal; PROC_UNLOCK(p2); /* * In a case of stack = NULL, we are supposed to COW calling process * stack. This is what normal fork() does, so we just keep tf_rsp arg * intact. */ - linux_set_upcall_kse(td2, PTROUT(args->stack)); + linux_set_upcall(td2, PTROUT(args->stack)); if (args->flags & LINUX_CLONE_SETTLS) linux_set_cloned_tls(td2, args->tls); /* * If CLONE_PARENT is set, then the parent of the new process will be * the same as that of the calling process. */ if (args->flags & LINUX_CLONE_PARENT) { sx_xlock(&proctree_lock); PROC_LOCK(p2); proc_reparent(p2, td->td_proc->p_pptr, true); PROC_UNLOCK(p2); sx_xunlock(&proctree_lock); } /* * Make this runnable after we are finished with it. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); td->td_retval[0] = p2->p_pid; return (0); } static int linux_clone_thread(struct thread *td, struct linux_clone_args *args) { struct linux_emuldata *em; struct thread *newtd; struct proc *p; int error; LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p", td->td_tid, (unsigned)args->flags, args->parent_tidptr, args->child_tidptr); if ((args->flags & LINUX_CLONE_PARENT) != 0) return (EINVAL); if (args->flags & LINUX_CLONE_PARENT_SETTID) if (args->parent_tidptr == NULL) return (EINVAL); /* Threads should be created with own stack */ if (args->stack == NULL) return (EINVAL); p = td->td_proc; #ifdef RACCT if (racct_enable) { PROC_LOCK(p); error = racct_add(p, RACCT_NTHR, 1); PROC_UNLOCK(p); if (error != 0) return (EPROCLIM); } #endif /* Initialize our td */ error = kern_thr_alloc(p, 0, &newtd); if (error) goto fail; cpu_copy_thread(newtd, td); bzero(&newtd->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); bcopy(&td->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); newtd->td_proc = p; thread_cow_get(newtd, td); /* create the emuldata */ linux_proc_init(td, newtd, args->flags); em = em_find(newtd); KASSERT(em != NULL, ("clone_thread: emuldata not found.\n")); if (args->flags & LINUX_CLONE_SETTLS) linux_set_cloned_tls(newtd, args->tls); if (args->flags & LINUX_CLONE_CHILD_SETTID) em->child_set_tid = args->child_tidptr; else em->child_set_tid = NULL; if (args->flags & LINUX_CLONE_CHILD_CLEARTID) em->child_clear_tid = args->child_tidptr; else em->child_clear_tid = NULL; cpu_thread_clean(newtd); - linux_set_upcall_kse(newtd, PTROUT(args->stack)); + linux_set_upcall(newtd, PTROUT(args->stack)); PROC_LOCK(p); p->p_flag |= P_HADTHREADS; thread_link(newtd, p); bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); thread_lock(td); /* let the scheduler know about these things. */ sched_fork_thread(td, newtd); thread_unlock(td); if (P_SHOULDSTOP(p)) newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; if (p->p_ptevents & PTRACE_LWP) newtd->td_dbgflags |= TDB_BORN; PROC_UNLOCK(p); tidhash_add(newtd); LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d", td->td_tid, newtd->td_tid); if (args->flags & LINUX_CLONE_PARENT_SETTID) { error = copyout(&newtd->td_tid, args->parent_tidptr, sizeof(newtd->td_tid)); if (error) linux_msg(td, "clone_thread: copyout td_tid failed!"); } /* * Make this runnable after we are finished with it. */ thread_lock(newtd); TD_SET_CAN_RUN(newtd); sched_add(newtd, SRQ_BORING); td->td_retval[0] = newtd->td_tid; return (0); fail: #ifdef RACCT if (racct_enable) { PROC_LOCK(p); racct_sub(p, RACCT_NTHR, 1); PROC_UNLOCK(p); } #endif return (error); } int linux_clone(struct thread *td, struct linux_clone_args *args) { if (args->flags & LINUX_CLONE_THREAD) return (linux_clone_thread(td, args)); else return (linux_clone_proc(td, args)); } int linux_exit(struct thread *td, struct linux_exit_args *args) { struct linux_emuldata *em; em = em_find(td); KASSERT(em != NULL, ("exit: emuldata not found.\n")); LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval); umtx_thread_exit(td); linux_thread_detach(td); /* * XXX. When the last two threads of a process * exit via pthread_exit() try thr_exit() first. */ kern_thr_exit(td); exit1(td, args->rval, 0); /* NOTREACHED */ } int linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) { struct linux_emuldata *em; em = em_find(td); KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); em->child_clear_tid = args->tidptr; td->td_retval[0] = em->em_tid; LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d", em->em_tid, args->tidptr, td->td_retval[0]); return (0); } void linux_thread_detach(struct thread *td) { struct linux_sys_futex_args cup; struct linux_emuldata *em; int *child_clear_tid; int error; em = em_find(td); KASSERT(em != NULL, ("thread_detach: emuldata not found.\n")); LINUX_CTR1(thread_detach, "thread(%d)", em->em_tid); release_futexes(td, em); child_clear_tid = em->child_clear_tid; if (child_clear_tid != NULL) { LINUX_CTR2(thread_detach, "thread(%d) %p", em->em_tid, child_clear_tid); error = suword32(child_clear_tid, 0); if (error != 0) return; cup.uaddr = child_clear_tid; cup.op = LINUX_FUTEX_WAKE; cup.val = 1; /* wake one */ cup.timeout = NULL; cup.uaddr2 = NULL; cup.val3 = 0; error = linux_sys_futex(td, &cup); /* * this cannot happen at the moment and if this happens it * probably means there is a user space bug */ if (error != 0) linux_msg(td, "futex stuff in thread_detach failed."); } } diff --git a/sys/compat/linux/linux_misc.h b/sys/compat/linux/linux_misc.h index c7e4fa38682c..da76753e3d24 100644 --- a/sys/compat/linux/linux_misc.h +++ b/sys/compat/linux/linux_misc.h @@ -1,188 +1,188 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 Roman Divacky * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_MISC_H_ #define _LINUX_MISC_H_ #include /* bits per mask */ #define LINUX_NFDBITS sizeof(l_fd_mask) * 8 /* * Miscellaneous */ #define LINUX_NAME_MAX 255 #define LINUX_MAX_UTSNAME 65 #define LINUX_CTL_MAXNAME 10 /* defines for prctl */ #define LINUX_PR_SET_PDEATHSIG 1 /* Second arg is a signal. */ #define LINUX_PR_GET_PDEATHSIG 2 /* * Second arg is a ptr to return the * signal. */ #define LINUX_PR_GET_DUMPABLE 3 #define LINUX_PR_SET_DUMPABLE 4 #define LINUX_PR_GET_KEEPCAPS 7 /* Get drop capabilities on setuid */ #define LINUX_PR_SET_KEEPCAPS 8 /* Set drop capabilities on setuid */ #define LINUX_PR_SET_NAME 15 /* Set process name. */ #define LINUX_PR_GET_NAME 16 /* Get process name. */ #define LINUX_PR_GET_SECCOMP 21 #define LINUX_PR_SET_SECCOMP 22 #define LINUX_PR_CAPBSET_READ 23 #define LINUX_PR_SET_NO_NEW_PRIVS 38 #define LINUX_PR_SET_PTRACER 1499557217 #define LINUX_MAX_COMM_LEN 16 /* Maximum length of the process name. */ /* For GET/SET DUMPABLE */ #define LINUX_SUID_DUMP_DISABLE 0 /* Don't coredump setuid processes. */ #define LINUX_SUID_DUMP_USER 1 /* Dump as user of process. */ #define LINUX_SUID_DUMP_ROOT 2 /* Dump as root. */ #define LINUX_MREMAP_MAYMOVE 1 #define LINUX_MREMAP_FIXED 2 #define LINUX_PATH_MAX 4096 extern const char *linux_kplatform; /* * Non-standard aux entry types used in Linux ELF binaries. */ #define LINUX_AT_PLATFORM 15 /* String identifying CPU */ #define LINUX_AT_HWCAP 16 /* CPU capabilities */ #define LINUX_AT_CLKTCK 17 /* frequency at which times() increments */ #define LINUX_AT_SECURE 23 /* secure mode boolean */ #define LINUX_AT_BASE_PLATFORM 24 /* string identifying real platform, may * differ from AT_PLATFORM. */ #define LINUX_AT_RANDOM 25 /* address of random bytes */ #define LINUX_AT_EXECFN 31 /* filename of program */ #define LINUX_AT_SYSINFO 32 /* vsyscall */ #define LINUX_AT_SYSINFO_EHDR 33 /* vdso header */ #define LINUX_AT_RANDOM_LEN 16 /* size of random bytes */ /* Linux sets the i387 to extended precision. */ #if defined(__i386__) || defined(__amd64__) #define __LINUX_NPXCW__ 0x37f #endif #define LINUX_CLONE_VM 0x00000100 #define LINUX_CLONE_FS 0x00000200 #define LINUX_CLONE_FILES 0x00000400 #define LINUX_CLONE_SIGHAND 0x00000800 #define LINUX_CLONE_PID 0x00001000 /* No longer exist in Linux */ #define LINUX_CLONE_PTRACE 0x00002000 #define LINUX_CLONE_VFORK 0x00004000 #define LINUX_CLONE_PARENT 0x00008000 #define LINUX_CLONE_THREAD 0x00010000 #define LINUX_CLONE_NEWNS 0x00020000 /* New mount NS */ #define LINUX_CLONE_SYSVSEM 0x00040000 #define LINUX_CLONE_SETTLS 0x00080000 #define LINUX_CLONE_PARENT_SETTID 0x00100000 #define LINUX_CLONE_CHILD_CLEARTID 0x00200000 #define LINUX_CLONE_DETACHED 0x00400000 /* Unused */ #define LINUX_CLONE_UNTRACED 0x00800000 #define LINUX_CLONE_CHILD_SETTID 0x01000000 #define LINUX_CLONE_NEWCGROUP 0x02000000 /* New cgroup NS */ #define LINUX_CLONE_NEWUTS 0x04000000 #define LINUX_CLONE_NEWIPC 0x08000000 #define LINUX_CLONE_NEWUSER 0x10000000 #define LINUX_CLONE_NEWPID 0x20000000 #define LINUX_CLONE_NEWNET 0x40000000 #define LINUX_CLONE_IO 0x80000000 /* Scheduling policies */ #define LINUX_SCHED_OTHER 0 #define LINUX_SCHED_FIFO 1 #define LINUX_SCHED_RR 2 #define LINUX_MAX_RT_PRIO 100 struct l_new_utsname { char sysname[LINUX_MAX_UTSNAME]; char nodename[LINUX_MAX_UTSNAME]; char release[LINUX_MAX_UTSNAME]; char version[LINUX_MAX_UTSNAME]; char machine[LINUX_MAX_UTSNAME]; char domainname[LINUX_MAX_UTSNAME]; }; #define LINUX_UTIME_NOW 0x3FFFFFFF #define LINUX_UTIME_OMIT 0x3FFFFFFE extern int stclohz; #define LINUX_WNOHANG 0x00000001 #define LINUX_WUNTRACED 0x00000002 #define LINUX_WSTOPPED LINUX_WUNTRACED #define LINUX_WEXITED 0x00000004 #define LINUX_WCONTINUED 0x00000008 #define LINUX_WNOWAIT 0x01000000 #define __WNOTHREAD 0x20000000 #define __WALL 0x40000000 #define __WCLONE 0x80000000 /* Linux waitid idtype */ #define LINUX_P_ALL 0 #define LINUX_P_PID 1 #define LINUX_P_PGID 2 #define LINUX_RLIMIT_LOCKS 10 #define LINUX_RLIMIT_SIGPENDING 11 #define LINUX_RLIMIT_MSGQUEUE 12 #define LINUX_RLIMIT_NICE 13 #define LINUX_RLIMIT_RTPRIO 14 #define LINUX_RLIMIT_RTTIME 15 #define LINUX_RLIM_INFINITY (~0UL) /* Linux getrandom flags */ #define LINUX_GRND_NONBLOCK 0x0001 #define LINUX_GRND_RANDOM 0x0002 /* Linux syslog flags */ #define LINUX_SYSLOG_ACTION_READ_ALL 3 #if defined(__amd64__) && !defined(COMPAT_LINUX32) int linux_ptrace_status(struct thread *td, int pid, int status); #endif void linux_to_bsd_waitopts(int options, int *bsdopts); -int linux_set_upcall_kse(struct thread *td, register_t stack); +int linux_set_upcall(struct thread *td, register_t stack); int linux_set_cloned_tls(struct thread *td, void *desc); struct thread *linux_tdfind(struct thread *, lwpid_t, pid_t); #endif /* _LINUX_MISC_H_ */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 5bbdfdb77b2d..c04fb57db4b1 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -1,675 +1,675 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_npx.h" #include "opt_reset.h" #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "__OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf."); union savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); return ((union savefpu *)p); } union savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { vm_offset_t p; p = (vm_offset_t)(pcb + 1); return ((union savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Common code shared between cpu_fork() and cpu_copy_thread() for * initializing a thread. */ static void copy_thread(struct thread *td1, struct thread *td2) { struct pcb *pcb2; pcb2 = td2->td_pcb; /* Ensure that td1's pcb is up to date for user threads. */ if ((td2->td_pflags & TDP_KTHREAD) == 0) { MPASS(td1 == curthread); td1->td_pcb->pcb_gs = rgs(); critical_enter(); if (PCPU_GET(fpcurthread) == td1) npxsave(td1->td_pcb->pcb_save); critical_exit(); } /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); /* Kernel threads start with clean NPX and segment bases. */ if ((td2->td_pflags & TDP_KTHREAD) != 0) { pcb2->pcb_gs = _udatasel; set_fsbase(td2, 0); set_gsbase(td2, 0); pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE | PCB_KERNNPX | PCB_KERNNPX_THR); } else { MPASS((pcb2->pcb_flags & (PCB_KERNNPX | PCB_KERNNPX_THR)) == 0); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); } /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td2; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp; /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_cr3: cloned above. * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt, *pldt1; mtx_lock_spin(&dt_lock); if ((pldt1 = mdp1->md_ldt) != NULL && pldt1->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt1->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_deref(pldt1); } else mtx_unlock_spin(&dt_lock); } return; } /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; copy_thread(td1, td2); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe * if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - VM86_STACK_SPACE) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_eax = 0; /* Child returns zero */ td2->td_frame->tf_eflags &= ~PSL_C; /* success */ td2->td_frame->tf_edx = 1; /* * If the parent process has the trap bit set (i.e. a debugger * had single stepped the process to the system call), we need * to clear the trap flag from the new frame. */ td2->td_frame->tf_eflags &= ~PSL_T; /* Set cr3 for the new process. */ pcb2->pcb_cr3 = pmap_get_cr3(vmspace_pmap(p2->p_vmspace)); /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = NULL; /* Copy the LDT, if necessary. */ mtx_lock_spin(&dt_lock); if (mdp2->md_ldt != NULL) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&dt_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. Reset pc->pcb_gs * and %gs before we free it in case they refer to an LDT entry. */ mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) { td->td_pcb->pcb_gs = _udatasel; load_gs(_udatasel); user_ldt_free(td); } else mtx_unlock_spin(&dt_lock); } void cpu_thread_exit(struct thread *td) { critical_enter(); if (td == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); /* Disable any hardware breakpoints. */ if (td->td_pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); td->td_pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != NULL) { /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1)); pcb->pcb_ext = NULL; } } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)((caddr_t)pcb - VM86_STACK_SPACE) - 1; pcb->pcb_ext = NULL; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } bool cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused) { return (true); } int cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused, int com __unused, void *data __unused) { return (EINVAL); } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_eax = td->td_retval[0]; td->td_frame->tf_edx = td->td_retval[1]; td->td_frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, int * 0x80 is 2 bytes. We saved this in tf_err. */ td->td_frame->tf_eip -= td->td_frame->tf_err; break; case EJUSTRETURN: break; default: td->td_frame->tf_eax = error; td->td_frame->tf_eflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { copy_thread(td0, td); /* * Copy user general-purpose registers. * * Some of these registers are rewritten by cpu_set_upcall() - * and linux_set_upcall_kse(). + * and linux_set_upcall(). */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_eflags &= ~PSL_T; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_ebp = 0; td->td_frame->tf_esp = (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_eip = (int)entry; /* Return address sentinel value to stop stack unwinding. */ suword((void *)td->td_frame->tf_esp, 0); /* Pass the argument to the entry point. */ suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)arg); } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct segment_descriptor sd; uint32_t base; /* * Construct a descriptor and store it in the pcb for * the next context switch. Also store it in the gdt * so that the load of tf_fs into %fs will activate it * at return to userland. */ base = (uint32_t)tls_base; sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; sd.sd_p = 1; sd.sd_xx = 0; sd.sd_def32 = 1; sd.sd_gran = 1; critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; if (td == curthread) { PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); } critical_exit(); return (0); } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } /* * Get an sf_buf from the freelist. May block if none are available. */ void sf_buf_map(struct sf_buf *sf, int flags) { pmap_sf_buf_map(sf); #ifdef SMP sf_buf_shootdown(sf, flags); #endif } #ifdef SMP static void sf_buf_shootdown_curcpu_cb(pmap_t pmap __unused, vm_offset_t addr1 __unused, vm_offset_t addr2 __unused) { } void sf_buf_shootdown(struct sf_buf *sf, int flags) { cpuset_t other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sf->cpumask)) { CPU_SET(cpuid, &sf->cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_ANDNOT(&other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap, sf_buf_shootdown_curcpu_cb); } } sched_unpin(); } #endif /* * MD part of sf_buf_free(). */ int sf_buf_unmap(struct sf_buf *sf) { return (0); } static void sf_buf_invalidate(struct sf_buf *sf) { vm_page_t m = sf->m; /* * Use pmap_qenter to update the pte for * existing mapping, in particular, the PAT * settings are recalculated. */ pmap_qenter(sf->kva, &m, 1); pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE); } /* * Invalidate the cache lines that may belong to the page, if * (possibly old) mapping of the page by sf buffer exists. Returns * TRUE when mapping was found and cache invalidated. */ boolean_t sf_buf_invalidate_cache(vm_page_t m) { return (sf_buf_process_page(m, sf_buf_invalidate)); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(vm_paddr_t addr) { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index 624a6ef6ae87..d2d713d4776d 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -1,743 +1,743 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needed for pcb definition in linux_set_thread_area */ #include "opt_posix.h" extern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */ struct l_descriptor { l_uint entry_number; l_ulong base_addr; l_uint limit; l_uint seg_32bit:1; l_uint contents:2; l_uint read_exec_only:1; l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; }; struct l_old_select_argv { l_int nfds; l_fd_set *readfds; l_fd_set *writefds; l_fd_set *exceptfds; struct l_timeval *timeout; }; int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; char *newpath; int error; if (!LUSECONVPATH(td)) { error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, args->argp, args->envp); } else { LCONVPATHEXIST(td, args->path, &newpath); error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, args->argp, args->envp); LFREEPATH(newpath); } if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } struct l_ipc_kludge { struct l_msgbuf *msgp; l_long msgtyp; }; int linux_ipc(struct thread *td, struct linux_ipc_args *args) { switch (args->what & 0xFFFF) { case LINUX_SEMOP: { struct linux_semop_args a; a.semid = args->arg1; a.tsops = PTRIN(args->ptr); a.nsops = args->arg2; return (linux_semop(td, &a)); } case LINUX_SEMGET: { struct linux_semget_args a; a.key = args->arg1; a.nsems = args->arg2; a.semflg = args->arg3; return (linux_semget(td, &a)); } case LINUX_SEMCTL: { struct linux_semctl_args a; int error; a.semid = args->arg1; a.semnum = args->arg2; a.cmd = args->arg3; error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg)); if (error) return (error); return (linux_semctl(td, &a)); } case LINUX_MSGSND: { struct linux_msgsnd_args a; a.msqid = args->arg1; a.msgp = PTRIN(args->ptr); a.msgsz = args->arg2; a.msgflg = args->arg3; return (linux_msgsnd(td, &a)); } case LINUX_MSGRCV: { struct linux_msgrcv_args a; a.msqid = args->arg1; a.msgsz = args->arg2; a.msgflg = args->arg3; if ((args->what >> 16) == 0) { struct l_ipc_kludge tmp; int error; if (args->ptr == 0) return (EINVAL); error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp)); if (error) return (error); a.msgp = PTRIN(tmp.msgp); a.msgtyp = tmp.msgtyp; } else { a.msgp = PTRIN(args->ptr); a.msgtyp = args->arg5; } return (linux_msgrcv(td, &a)); } case LINUX_MSGGET: { struct linux_msgget_args a; a.key = args->arg1; a.msgflg = args->arg2; return (linux_msgget(td, &a)); } case LINUX_MSGCTL: { struct linux_msgctl_args a; a.msqid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_msgctl(td, &a)); } case LINUX_SHMAT: { struct linux_shmat_args a; l_uintptr_t addr; int error; a.shmid = args->arg1; a.shmaddr = PTRIN(args->ptr); a.shmflg = args->arg2; error = linux_shmat(td, &a); if (error != 0) return (error); addr = td->td_retval[0]; error = copyout(&addr, PTRIN(args->arg3), sizeof(addr)); td->td_retval[0] = 0; return (error); } case LINUX_SHMDT: { struct linux_shmdt_args a; a.shmaddr = PTRIN(args->ptr); return (linux_shmdt(td, &a)); } case LINUX_SHMGET: { struct linux_shmget_args a; a.key = args->arg1; a.size = args->arg2; a.shmflg = args->arg3; return (linux_shmget(td, &a)); } case LINUX_SHMCTL: { struct linux_shmctl_args a; a.shmid = args->arg1; a.cmd = args->arg2; a.buf = PTRIN(args->ptr); return (linux_shmctl(td, &a)); } default: break; } return (EINVAL); } int linux_old_select(struct thread *td, struct linux_old_select_args *args) { struct l_old_select_argv linux_args; struct linux_select_args newsel; int error; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); newsel.nfds = linux_args.nfds; newsel.readfds = linux_args.readfds; newsel.writefds = linux_args.writefds; newsel.exceptfds = linux_args.exceptfds; newsel.timeout = linux_args.timeout; return (linux_select(td, &newsel)); } int linux_set_cloned_tls(struct thread *td, void *desc) { struct segment_descriptor sd; struct l_user_desc info; int idx, error; int a[2]; error = copyin(desc, &info, sizeof(struct l_user_desc)); if (error) { linux_msg(td, "set_cloned_tls copyin failed!"); } else { idx = info.entry_number; /* * looks like we're getting the idx we returned * in the set_thread_area() syscall */ if (idx != 6 && idx != 3) { linux_msg(td, "set_cloned_tls resetting idx!"); idx = 3; } /* this doesnt happen in practice */ if (idx == 6) { /* we might copy out the entry_number as 3 */ info.entry_number = 3; error = copyout(&info, desc, sizeof(struct l_user_desc)); if (error) linux_msg(td, "set_cloned_tls copyout failed!"); } a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); memcpy(&sd, &a, sizeof(a)); /* set %gs */ td->td_pcb->pcb_gsd = sd; td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); } return (error); } int -linux_set_upcall_kse(struct thread *td, register_t stack) +linux_set_upcall(struct thread *td, register_t stack) { if (stack) td->td_frame->tf_esp = stack; /* * The newly created Linux thread returns * to the user space by the same path that a parent do. */ td->td_frame->tf_eax = 0; return (0); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *args) { return (linux_mmap_common(td, args->addr, args->len, args->prot, args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * PAGE_SIZE)); } int linux_mmap(struct thread *td, struct linux_mmap_args *args) { int error; struct l_mmap_argv linux_args; error = copyin(args->ptr, &linux_args, sizeof(linux_args)); if (error) return (error); return (linux_mmap_common(td, linux_args.addr, linux_args.len, linux_args.prot, linux_args.flags, linux_args.fd, (uint32_t)linux_args.pgoff)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_ioperm(struct thread *td, struct linux_ioperm_args *args) { int error; struct i386_ioperm_args iia; iia.start = args->start; iia.length = args->length; iia.enable = args->enable; error = i386_set_ioperm(td, &iia); return (error); } int linux_iopl(struct thread *td, struct linux_iopl_args *args) { int error; if (args->level < 0 || args->level > 3) return (EINVAL); if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | (args->level * (PSL_IOPL / 3)); return (0); } int linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) { int error; struct i386_ldt_args ldt; struct l_descriptor ld; union descriptor desc; int size, written; switch (uap->func) { case 0x00: /* read_ldt */ ldt.start = 0; ldt.descs = uap->ptr; ldt.num = uap->bytecount / sizeof(union descriptor); error = i386_get_ldt(td, &ldt); td->td_retval[0] *= sizeof(union descriptor); break; case 0x02: /* read_default_ldt = 0 */ size = 5*sizeof(struct l_desc_struct); if (size > uap->bytecount) size = uap->bytecount; for (written = error = 0; written < size && error == 0; written++) error = subyte((char *)uap->ptr + written, 0); td->td_retval[0] = written; break; case 0x01: /* write_ldt */ case 0x11: /* write_ldt */ if (uap->bytecount != sizeof(ld)) return (EINVAL); error = copyin(uap->ptr, &ld, sizeof(ld)); if (error) return (error); ldt.start = ld.entry_number; ldt.descs = &desc; ldt.num = 1; desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | (ld.contents << 2); desc.sd.sd_dpl = 3; desc.sd.sd_p = (ld.seg_not_present ^ 1); desc.sd.sd_xx = 0; desc.sd.sd_def32 = ld.seg_32bit; desc.sd.sd_gran = ld.limit_in_pages; error = i386_set_ldt(td, &ldt, &desc); break; default: error = ENOSYS; break; } if (error == EOPNOTSUPP) { linux_msg(td, "modify_ldt needs kernel option USER_LDT"); error = ENOSYS; } return (error); } int linux_sigaction(struct thread *td, struct linux_sigaction_args *args) { l_osigaction_t osa; l_sigaction_t act, oact; int error; if (args->nsa != NULL) { error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); if (error) return (error); act.lsa_handler = osa.lsa_handler; act.lsa_flags = osa.lsa_flags; act.lsa_restorer = osa.lsa_restorer; LINUX_SIGEMPTYSET(act.lsa_mask); act.lsa_mask.__mask = osa.lsa_mask; } error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, args->osa ? &oact : NULL); if (args->osa != NULL && !error) { osa.lsa_handler = oact.lsa_handler; osa.lsa_flags = oact.lsa_flags; osa.lsa_restorer = oact.lsa_restorer; osa.lsa_mask = oact.lsa_mask.__mask; error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); } return (error); } /* * Linux has two extra args, restart and oldmask. We dont use these, * but it seems that "restart" is actually a context pointer that * enables the signal to happen with a different register set. */ int linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) { sigset_t sigmask; l_sigset_t mask; LINUX_SIGEMPTYSET(mask); mask.__mask = args->mask; linux_to_bsd_sigset(&mask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) { l_sigset_t lmask; sigset_t sigmask; int error; if (uap->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); if (error) return (error); linux_to_bsd_sigset(&lmask, &sigmask); return (kern_sigsuspend(td, sigmask)); } int linux_pause(struct thread *td, struct linux_pause_args *args) { struct proc *p = td->td_proc; sigset_t sigmask; PROC_LOCK(p); sigmask = td->td_sigmask; PROC_UNLOCK(p); return (kern_sigsuspend(td, sigmask)); } int linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) { stack_t ss, oss; l_stack_t lss; int error; if (uap->uss != NULL) { error = copyin(uap->uss, &lss, sizeof(l_stack_t)); if (error) return (error); ss.ss_sp = lss.ss_sp; ss.ss_size = lss.ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); } error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, (uap->uoss != NULL) ? &oss : NULL); if (!error && uap->uoss != NULL) { lss.ss_sp = oss.ss_sp; lss.ss_size = oss.ss_size; lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); } return (error); } int linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; int error; int idx; int a[2]; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* * Semantics of Linux version: every thread in the system has array of * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This * syscall loads one of the selected tls decriptors with a value and * also loads GDT descriptors 6, 7 and 8 with the content of the * per-thread descriptors. * * Semantics of FreeBSD version: I think we can ignore that Linux has 3 * per-thread descriptors and use just the 1st one. The tls_array[] * is used only in set/get-thread_area() syscalls and for loading the * GDT descriptors. In FreeBSD we use just one GDT descriptor for TLS * so we will load just one. * * XXX: this doesn't work when a user space process tries to use more * than 1 TLS segment. Comment in the Linux sources says wine might do * this. */ /* * we support just GLIBC TLS now * we should let 3 proceed as well because we use this segment so * if code does two subsequent calls it should succeed */ if (idx != 6 && idx != -1 && idx != 3) return (EINVAL); /* * we have to copy out the GDT entry we use * FreeBSD uses GDT entry #3 for storing %gs so load that * * XXX: what if a user space program doesn't check this value and tries * to use 6, 7 or 8? */ idx = info.entry_number = 3; error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); if (LINUX_LDT_empty(&info)) { a[0] = 0; a[1] = 0; } else { a[0] = LINUX_LDT_entry_a(&info); a[1] = LINUX_LDT_entry_b(&info); } memcpy(&sd, &a, sizeof(a)); /* this is taken from i386 version of cpu_set_user_tls() */ critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); critical_exit(); return (0); } int linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) { struct l_user_desc info; int error; int idx; struct l_desc_struct desc; struct segment_descriptor sd; error = copyin(args->desc, &info, sizeof(struct l_user_desc)); if (error) return (error); idx = info.entry_number; /* XXX: I am not sure if we want 3 to be allowed too. */ if (idx != 6 && idx != 3) return (EINVAL); idx = 3; memset(&info, 0, sizeof(info)); sd = PCPU_GET(fsgs_gdt)[1]; memcpy(&desc, &sd, sizeof(desc)); info.entry_number = idx; info.base_addr = LINUX_GET_BASE(&desc); info.limit = LINUX_GET_LIMIT(&desc); info.seg_32bit = LINUX_GET_32BIT(&desc); info.contents = LINUX_GET_CONTENTS(&desc); info.read_exec_only = !LINUX_GET_WRITABLE(&desc); info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); info.seg_not_present = !LINUX_GET_PRESENT(&desc); info.useable = LINUX_GET_USEABLE(&desc); error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (EFAULT); return (0); } /* XXX: this wont work with module - convert it */ int linux_mq_open(struct thread *td, struct linux_mq_open_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_open(td, (struct kmq_open_args *)args)); #else return (ENOSYS); #endif } int linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_unlink(td, (struct kmq_unlink_args *)args)); #else return (ENOSYS); #endif } int linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_timedsend(td, (struct kmq_timedsend_args *)args)); #else return (ENOSYS); #endif } int linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_timedreceive(td, (struct kmq_timedreceive_args *)args)); #else return (ENOSYS); #endif } int linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_notify(td, (struct kmq_notify_args *)args)); #else return (ENOSYS); #endif } int linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) { #ifdef P1003_1B_MQUEUE return (sys_kmq_setattr(td, (struct kmq_setattr_args *)args)); #else return (ENOSYS); #endif }