diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index f74091438648..1c6b1549422b 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -1,685 +1,675 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include #include "opt_isa.h" #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf."); void set_top_of_stack_td(struct thread *td) { td->td_md.md_stack_base = td->td_kstack + td->td_kstack_pages * PAGE_SIZE; } struct savefpu * get_pcb_user_save_td(struct thread *td) { KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area ptr %p td %p", td->td_md.md_usr_fpu_save, td)); return (td->td_md.md_usr_fpu_save); } struct pcb * get_pcb_td(struct thread *td) { return (&td->td_md.md_pcb); } struct savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { struct thread *td; td = __containerof(pcb, struct thread, td_md.md_pcb); return (get_pcb_user_save_td(td)); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Common code shared between cpu_fork() and cpu_copy_thread() for * initializing a thread. */ static void copy_thread(struct thread *td1, struct thread *td2) { struct pcb *pcb2; pcb2 = td2->td_pcb; /* Ensure that td1's pcb is up to date for user threads. */ if ((td2->td_pflags & TDP_KTHREAD) == 0) { MPASS(td1 == curthread); fpuexit(td1); update_pcb_bases(td1->td_pcb); } /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); /* Kernel threads start with clean FPU and segment bases. */ if ((td2->td_pflags & TDP_KTHREAD) != 0) { pcb2->pcb_fsbase = 0; pcb2->pcb_gsbase = 0; clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE | PCB_KERNFPU | PCB_KERNFPU_THR); } else { MPASS((pcb2->pcb_flags & (PCB_KERNFPU | PCB_KERNFPU_THR)) == 0); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); } td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); pcb2->pcb_rbx = (register_t)td2; /* fork_trampoline argument */ pcb2->pcb_rip = (register_t)fork_trampoline; /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_[fg]sbase: cloned above */ pcb2->pcb_tssp = NULL; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; pmap_thread_init_invl_gen(td2); /* * Copy the trap frame for the return to user mode as if from a syscall. * This copies most of the user mode register values. Some of these * registers are rewritten by cpu_set_upcall() and linux_set_upcall(). */ if ((td1->td_proc->p_flag & P_KPROC) == 0) { bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); /* * If the current thread has the trap bit set (i.e. a debugger * had single stepped the process to the system call), we need * to clear the trap flag from the new frame. Otherwise, the new * thread will receive a (likely unexpected) SIGTRAP when it * executes the first instruction after returning to userland. */ td2->td_frame->tf_rflags &= ~PSL_T; } } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct proc *p1; struct pcb *pcb2; struct mdproc *mdp1, *mdp2; struct proc_ldt *pldt; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ mdp1 = &p1->p_md; mtx_lock(&dt_lock); if ((pldt = mdp1->md_ldt) != NULL && pldt->ldt_refcnt > 1 && user_ldt_alloc(p1, 1) == NULL) panic("could not copy LDT"); mtx_unlock(&dt_lock); } return; } /* Point the stack and pcb to the actual location */ set_top_of_stack_td(td2); td2->td_pcb = pcb2 = get_pcb_td(td2); copy_thread(td1, td2); /* Reset debug registers in the new process */ x86_clear_dbregs(pcb2); /* Point mdproc and then copy over p1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* Set child return values. */ p2->p_sysent->sv_set_fork_retval(td2); /* As on i386, do not copy io permission bitmap. */ pcb2->pcb_tssp = NULL; /* New segment registers. */ set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; mdp2 = &p2->p_md; if (mdp1->md_ldt == NULL) { mdp2->md_ldt = NULL; return; } mtx_lock(&dt_lock); if (mdp1->md_ldt != NULL) { if (flags & RFMEM) { mdp1->md_ldt->ldt_refcnt++; mdp2->md_ldt = mdp1->md_ldt; bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct system_segment_descriptor)); } else { mdp2->md_ldt = NULL; mdp2->md_ldt = user_ldt_alloc(p2, 0); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); amd64_set_ldt_data(td2, 0, max_ldt_segment, (struct user_segment_descriptor *) mdp1->md_ldt->ldt_base); } } else mdp2->md_ldt = NULL; mtx_unlock(&dt_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_rsp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %rbx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } void x86_set_fork_retval(struct thread *td) { struct trapframe *frame = td->td_frame; frame->tf_rax = 0; /* Child returns zero */ frame->tf_rflags &= ~PSL_C; /* success */ frame->tf_rdx = 1; /* System V emulation */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_r12 = (long) func; /* function */ td->td_pcb->pcb_rbx = (long) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. */ if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); } void cpu_thread_exit(struct thread *td) { struct pcb *pcb; critical_enter(); if (td == PCPU_GET(fpcurthread)) fpudrop(); critical_exit(); pcb = td->td_pcb; /* Disable any hardware breakpoints. */ if (pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); clear_pcb_flags(pcb, PCB_DBREGS); } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; /* * Clean TSS/iomap */ if (pcb->pcb_tssp != NULL) { pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp, (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1)); kmem_free(pcb->pcb_tssp, ctob(IOPAGES + 1)); pcb->pcb_tssp = NULL; } } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; set_top_of_stack_td(td); td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; td->td_md.md_usr_fpu_save = fpu_save_area_alloc(); pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); fpu_save_area_free(td->td_md.md_usr_fpu_save); td->td_md.md_usr_fpu_save = NULL; } bool cpu_exec_vmspace_reuse(struct proc *p, vm_map_t map) { return (((curproc->p_md.md_flags & P_MD_KPTI) != 0) == (vm_map_pmap(map)->pm_ucr3 != PMAP_NO_CR3)); } static void cpu_procctl_kpti_ctl(struct proc *p, int val) { if (pti && val == PROC_KPTI_CTL_ENABLE_ON_EXEC) p->p_md.md_flags |= P_MD_KPTI; if (val == PROC_KPTI_CTL_DISABLE_ON_EXEC) p->p_md.md_flags &= ~P_MD_KPTI; } static void cpu_procctl_kpti_status(struct proc *p, int *val) { *val = (p->p_md.md_flags & P_MD_KPTI) != 0 ? PROC_KPTI_CTL_ENABLE_ON_EXEC: PROC_KPTI_CTL_DISABLE_ON_EXEC; if (vmspace_pmap(p->p_vmspace)->pm_ucr3 != PMAP_NO_CR3) *val |= PROC_KPTI_STATUS_ACTIVE; } static int cpu_procctl_la_ctl(struct proc *p, int val) { int error; error = 0; switch (val) { case PROC_LA_CTL_LA48_ON_EXEC: p->p_md.md_flags |= P_MD_LA48; p->p_md.md_flags &= ~P_MD_LA57; break; case PROC_LA_CTL_LA57_ON_EXEC: if (la57) { p->p_md.md_flags &= ~P_MD_LA48; p->p_md.md_flags |= P_MD_LA57; } else { error = ENOTSUP; } break; case PROC_LA_CTL_DEFAULT_ON_EXEC: p->p_md.md_flags &= ~(P_MD_LA48 | P_MD_LA57); break; } return (error); } static void cpu_procctl_la_status(struct proc *p, int *val) { int res; if ((p->p_md.md_flags & P_MD_LA48) != 0) res = PROC_LA_CTL_LA48_ON_EXEC; else if ((p->p_md.md_flags & P_MD_LA57) != 0) res = PROC_LA_CTL_LA57_ON_EXEC; else res = PROC_LA_CTL_DEFAULT_ON_EXEC; if (p->p_sysent->sv_maxuser == VM_MAXUSER_ADDRESS_LA48) res |= PROC_LA_STATUS_LA48; else res |= PROC_LA_STATUS_LA57; *val = res; } int cpu_procctl(struct thread *td, int idtype, id_t id, int com, void *data) { struct proc *p; int error, val; switch (com) { case PROC_KPTI_CTL: case PROC_KPTI_STATUS: case PROC_LA_CTL: case PROC_LA_STATUS: if (idtype != P_PID) { error = EINVAL; break; } if (com == PROC_KPTI_CTL) { /* sad but true and not a joke */ error = priv_check(td, PRIV_IO); if (error != 0) break; } if (com == PROC_KPTI_CTL || com == PROC_LA_CTL) { error = copyin(data, &val, sizeof(val)); if (error != 0) break; } if (com == PROC_KPTI_CTL && val != PROC_KPTI_CTL_ENABLE_ON_EXEC && val != PROC_KPTI_CTL_DISABLE_ON_EXEC) { error = EINVAL; break; } if (com == PROC_LA_CTL && val != PROC_LA_CTL_LA48_ON_EXEC && val != PROC_LA_CTL_LA57_ON_EXEC && val != PROC_LA_CTL_DEFAULT_ON_EXEC) { error = EINVAL; break; } error = pget(id, PGET_CANSEE | PGET_NOTWEXIT | PGET_NOTID, &p); if (error != 0) break; switch (com) { case PROC_KPTI_CTL: cpu_procctl_kpti_ctl(p, val); break; case PROC_KPTI_STATUS: cpu_procctl_kpti_status(p, &val); break; case PROC_LA_CTL: error = cpu_procctl_la_ctl(p, val); break; case PROC_LA_STATUS: cpu_procctl_la_status(p, &val); break; } PROC_UNLOCK(p); if (com == PROC_KPTI_STATUS || com == PROC_LA_STATUS) error = copyout(&val, data, sizeof(val)); break; default: error = EINVAL; break; } return (error); } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; if (__predict_true(error == 0)) { frame->tf_rax = td->td_retval[0]; frame->tf_rdx = td->td_retval[1]; frame->tf_rflags &= ~PSL_C; return; } switch (error) { case ERESTART: /* * Reconstruct pc, we know that 'syscall' is 2 bytes, * lcall $X,y is 7 bytes, int 0x80 is 2 bytes. * We saved this in tf_err. * %r10 (which was holding the value of %rcx) is restored * for the next iteration. * %r10 restore is only required for freebsd/amd64 processes, * but shall be innocent for any ia32 ABI. * * Require full context restore to get the arguments * in the registers reloaded at return to usermode. */ frame->tf_rip -= frame->tf_err; frame->tf_r10 = frame->tf_rcx; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); break; case EJUSTRETURN: break; default: frame->tf_rax = error; frame->tf_rflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { copy_thread(td0, td); set_pcb_flags_raw(td->td_pcb, PCB_FULL_IRET); } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ int cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { - - /* - * Do any extra cleaning that needs to be done. - * The thread may have optional components - * that are not present in a fresh thread. - * This may be a recycled thread so make it look - * as though it's newly allocated. - */ - cpu_thread_clean(td); - #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_rbp = 0; td->td_frame->tf_rsp = (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_rip = (uintptr_t)entry; /* Return address sentinel value to stop stack unwinding. */ if (suword32((void *)td->td_frame->tf_rsp, 0) != 0) return (EFAULT); /* Pass the argument to the entry point. */ if (suword32( (void *)(td->td_frame->tf_rsp + sizeof(int32_t)), (uint32_t)(uintptr_t)arg) != 0) return (EFAULT); return (0); } #endif /* * Set the trap frame to point at the beginning of the uts * function. */ td->td_frame->tf_rbp = 0; td->td_frame->tf_rsp = ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f; td->td_frame->tf_rsp -= 8; td->td_frame->tf_rip = (register_t)entry; td->td_frame->tf_ds = _udatasel; td->td_frame->tf_es = _udatasel; td->td_frame->tf_fs = _ufssel; td->td_frame->tf_gs = _ugssel; td->td_frame->tf_flags = TF_HASSEGS; /* Return address sentinel value to stop stack unwinding. */ if (suword((void *)td->td_frame->tf_rsp, 0) != 0) return (EFAULT); /* Pass the argument to the entry point. */ td->td_frame->tf_rdi = (register_t)arg; return (0); } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; set_pcb_flags(pcb, PCB_FULL_IRET); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { pcb->pcb_gsbase = (register_t)tls_base; return (0); } #endif pcb->pcb_fsbase = (register_t)tls_base; return (0); } diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index 4ce3bc192b4e..1c9189162a09 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -1,551 +1,549 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2002 Doug Rabson * Copyright (c) 2000 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #ifdef LINUX_LEGACY_SYSCALLS int linux_fork(struct thread *td, struct linux_fork_args *args) { struct fork_req fr; int error; struct proc *p2; struct thread *td2; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &p2; if ((error = fork1(td, &fr)) != 0) return (error); td2 = FIRST_THREAD_IN_PROC(p2); linux_proc_init(td, td2, false); td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); return (0); } int linux_vfork(struct thread *td, struct linux_vfork_args *args) { struct fork_req fr; int error; struct proc *p2; struct thread *td2; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFMEM | RFPPWAIT | RFSTOPPED; fr.fr_procp = &p2; if ((error = fork1(td, &fr)) != 0) return (error); td2 = FIRST_THREAD_IN_PROC(p2); linux_proc_init(td, td2, false); td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); return (0); } #endif static int linux_clone_proc(struct thread *td, struct l_clone_args *args) { struct fork_req fr; int error, ff, f2; struct proc *p2; struct thread *td2; int exit_signal; struct linux_emuldata *em; f2 = 0; ff = RFPROC | RFSTOPPED; if (LINUX_SIG_VALID(args->exit_signal)) { exit_signal = linux_to_bsd_signal(args->exit_signal); } else if (args->exit_signal != 0) return (EINVAL); else exit_signal = 0; if (args->flags & LINUX_CLONE_VM) ff |= RFMEM; if (args->flags & LINUX_CLONE_SIGHAND) ff |= RFSIGSHARE; if ((args->flags & LINUX_CLONE_CLEAR_SIGHAND) != 0) f2 |= FR2_DROPSIG_CAUGHT; if (args->flags & LINUX_CLONE_FILES) { if (!(args->flags & LINUX_CLONE_FS)) f2 |= FR2_SHARE_PATHS; } else { ff |= RFFDG; if (args->flags & LINUX_CLONE_FS) f2 |= FR2_SHARE_PATHS; } if (args->flags & LINUX_CLONE_PARENT_SETTID) if (args->parent_tid == NULL) return (EINVAL); if (args->flags & LINUX_CLONE_VFORK) ff |= RFPPWAIT; bzero(&fr, sizeof(fr)); fr.fr_flags = ff; fr.fr_flags2 = f2; fr.fr_procp = &p2; error = fork1(td, &fr); if (error) return (error); td2 = FIRST_THREAD_IN_PROC(p2); /* create the emuldata */ linux_proc_init(td, td2, false); em = em_find(td2); KASSERT(em != NULL, ("clone_proc: emuldata not found.\n")); if (args->flags & LINUX_CLONE_CHILD_SETTID) em->child_set_tid = args->child_tid; else em->child_set_tid = NULL; if (args->flags & LINUX_CLONE_CHILD_CLEARTID) em->child_clear_tid = args->child_tid; else em->child_clear_tid = NULL; if (args->flags & LINUX_CLONE_PARENT_SETTID) { error = copyout(&p2->p_pid, args->parent_tid, sizeof(p2->p_pid)); if (error) linux_msg(td, "copyout p_pid failed!"); } PROC_LOCK(p2); p2->p_sigparent = exit_signal; PROC_UNLOCK(p2); /* * In a case of stack = NULL, we are supposed to COW calling process * stack. This is what normal fork() does, so we just keep tf_rsp arg * intact. */ linux_set_upcall(td2, args->stack); if (args->flags & LINUX_CLONE_SETTLS) linux_set_cloned_tls(td2, PTRIN(args->tls)); /* * If CLONE_PARENT is set, then the parent of the new process will be * the same as that of the calling process. */ if (args->flags & LINUX_CLONE_PARENT) { sx_xlock(&proctree_lock); PROC_LOCK(p2); proc_reparent(p2, td->td_proc->p_pptr, true); PROC_UNLOCK(p2); sx_xunlock(&proctree_lock); } /* * Make this runnable after we are finished with it. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); td->td_retval[0] = p2->p_pid; return (0); } static int linux_clone_thread(struct thread *td, struct l_clone_args *args) { struct linux_emuldata *em; struct thread *newtd; struct proc *p; int error; LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p", td->td_tid, (unsigned)args->flags, args->parent_tid, args->child_tid); if ((args->flags & LINUX_CLONE_PARENT) != 0) return (EINVAL); if (args->flags & LINUX_CLONE_PARENT_SETTID) if (args->parent_tid == NULL) return (EINVAL); /* Threads should be created with own stack */ if (PTRIN(args->stack) == NULL) return (EINVAL); p = td->td_proc; #ifdef RACCT if (racct_enable) { PROC_LOCK(p); error = racct_add(p, RACCT_NTHR, 1); PROC_UNLOCK(p); if (error != 0) return (EPROCLIM); } #endif /* Initialize our td */ error = kern_thr_alloc(p, 0, &newtd); if (error) goto fail; bzero(&newtd->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); bcopy(&td->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); newtd->td_proc = p; thread_cow_get(newtd, td); cpu_copy_thread(newtd, td); /* create the emuldata */ linux_proc_init(td, newtd, true); em = em_find(newtd); KASSERT(em != NULL, ("clone_thread: emuldata not found.\n")); if (args->flags & LINUX_CLONE_SETTLS) linux_set_cloned_tls(newtd, PTRIN(args->tls)); if (args->flags & LINUX_CLONE_CHILD_SETTID) em->child_set_tid = args->child_tid; else em->child_set_tid = NULL; if (args->flags & LINUX_CLONE_CHILD_CLEARTID) em->child_clear_tid = args->child_tid; else em->child_clear_tid = NULL; - cpu_thread_clean(newtd); - linux_set_upcall(newtd, args->stack); PROC_LOCK(p); p->p_flag |= P_HADTHREADS; thread_link(newtd, p); bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); thread_lock(td); /* let the scheduler know about these things. */ sched_fork_thread(td, newtd); thread_unlock(td); if (P_SHOULDSTOP(p)) ast_sched(newtd, TDA_SUSPEND); if (p->p_ptevents & PTRACE_LWP) newtd->td_dbgflags |= TDB_BORN; PROC_UNLOCK(p); tidhash_add(newtd); LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d", td->td_tid, newtd->td_tid); if (args->flags & LINUX_CLONE_PARENT_SETTID) { error = copyout(&newtd->td_tid, args->parent_tid, sizeof(newtd->td_tid)); if (error) linux_msg(td, "clone_thread: copyout td_tid failed!"); } /* * Make this runnable after we are finished with it. */ thread_lock(newtd); TD_SET_CAN_RUN(newtd); sched_add(newtd, SRQ_BORING); td->td_retval[0] = newtd->td_tid; return (0); fail: #ifdef RACCT if (racct_enable) { PROC_LOCK(p); racct_sub(p, RACCT_NTHR, 1); PROC_UNLOCK(p); } #endif return (error); } int linux_clone(struct thread *td, struct linux_clone_args *args) { struct l_clone_args ca = { .flags = (lower_32_bits(args->flags) & ~LINUX_CSIGNAL), .child_tid = args->child_tidptr, .parent_tid = args->parent_tidptr, .exit_signal = (lower_32_bits(args->flags) & LINUX_CSIGNAL), .stack = args->stack, .tls = args->tls, }; if (args->flags & LINUX_CLONE_THREAD) return (linux_clone_thread(td, &ca)); else return (linux_clone_proc(td, &ca)); } static int linux_clone3_args_valid(struct l_user_clone_args *uca) { /* Verify that no unknown flags are passed along. */ if ((uca->flags & ~(LINUX_CLONE_LEGACY_FLAGS | LINUX_CLONE_CLEAR_SIGHAND | LINUX_CLONE_INTO_CGROUP)) != 0) return (EINVAL); if ((uca->flags & (LINUX_CLONE_DETACHED | LINUX_CSIGNAL)) != 0) return (EINVAL); if ((uca->flags & (LINUX_CLONE_SIGHAND | LINUX_CLONE_CLEAR_SIGHAND)) == (LINUX_CLONE_SIGHAND | LINUX_CLONE_CLEAR_SIGHAND)) return (EINVAL); if ((uca->flags & (LINUX_CLONE_THREAD | LINUX_CLONE_PARENT)) != 0 && uca->exit_signal != 0) return (EINVAL); /* We don't support set_tid, only validate input. */ if (uca->set_tid_size > LINUX_MAX_PID_NS_LEVEL) return (EINVAL); if (uca->set_tid == 0 && uca->set_tid_size > 0) return (EINVAL); if (uca->set_tid != 0 && uca->set_tid_size == 0) return (EINVAL); if (uca->stack == 0 && uca->stack_size > 0) return (EINVAL); if (uca->stack != 0 && uca->stack_size == 0) return (EINVAL); /* Verify that higher 32bits of exit_signal are unset. */ if ((uca->exit_signal & ~(uint64_t)LINUX_CSIGNAL) != 0) return (EINVAL); /* Verify that no unsupported flags are passed along. */ if ((uca->flags & LINUX_CLONE_NEWTIME) != 0) { LINUX_RATELIMIT_MSG("unsupported clone3 option CLONE_NEWTIME"); return (ENOSYS); } if ((uca->flags & LINUX_CLONE_INTO_CGROUP) != 0) { LINUX_RATELIMIT_MSG("unsupported clone3 option CLONE_INTO_CGROUP"); return (ENOSYS); } if (uca->set_tid != 0 || uca->set_tid_size != 0) { LINUX_RATELIMIT_MSG("unsupported clone3 set_tid"); return (ENOSYS); } return (0); } int linux_clone3(struct thread *td, struct linux_clone3_args *args) { struct l_user_clone_args *uca; struct l_clone_args *ca; size_t size; int error; if (args->usize > PAGE_SIZE) return (E2BIG); if (args->usize < LINUX_CLONE_ARGS_SIZE_VER0) return (EINVAL); /* * usize can be less than size of struct clone_args, to avoid using * of uninitialized data of struct clone_args, allocate at least * sizeof(struct clone_args) storage and zero it. */ size = max(args->usize, sizeof(*uca)); uca = malloc(size, M_LINUX, M_WAITOK | M_ZERO); error = copyin(args->uargs, uca, args->usize); if (error != 0) goto out; error = linux_clone3_args_valid(uca); if (error != 0) goto out; ca = malloc(sizeof(*ca), M_LINUX, M_WAITOK | M_ZERO); ca->flags = uca->flags; ca->child_tid = PTRIN(uca->child_tid); ca->parent_tid = PTRIN(uca->parent_tid); ca->exit_signal = uca->exit_signal; ca->stack = uca->stack + uca->stack_size; ca->stack_size = uca->stack_size; ca->tls = uca->tls; if ((ca->flags & LINUX_CLONE_THREAD) != 0) error = linux_clone_thread(td, ca); else error = linux_clone_proc(td, ca); free(ca, M_LINUX); out: free(uca, M_LINUX); return (error); } int linux_exit(struct thread *td, struct linux_exit_args *args) { struct linux_emuldata *em __diagused; em = em_find(td); KASSERT(em != NULL, ("exit: emuldata not found.\n")); LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval); linux_thread_detach(td); /* * XXX. When the last two threads of a process * exit via pthread_exit() try thr_exit() first. */ kern_thr_exit(td); exit1(td, args->rval, 0); /* NOTREACHED */ } int linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) { struct linux_emuldata *em; em = em_find(td); KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); em->child_clear_tid = args->tidptr; td->td_retval[0] = em->em_tid; LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d", em->em_tid, args->tidptr, td->td_retval[0]); return (0); } void linux_thread_detach(struct thread *td) { struct linux_emuldata *em; int *child_clear_tid; int error; em = em_find(td); KASSERT(em != NULL, ("thread_detach: emuldata not found.\n")); LINUX_CTR1(thread_detach, "thread(%d)", em->em_tid); release_futexes(td, em); child_clear_tid = em->child_clear_tid; if (child_clear_tid != NULL) { LINUX_CTR2(thread_detach, "thread(%d) %p", em->em_tid, child_clear_tid); error = suword32(child_clear_tid, 0); if (error != 0) return; error = futex_wake(td, child_clear_tid, 1, false); /* * this cannot happen at the moment and if this happens it * probably means there is a user space bug */ if (error != 0) linux_msg(td, "futex stuff in thread_detach failed."); } /* * Do not rely on the robust list which is maintained by userspace, * cleanup remaining pi (if any) after release_futexes anyway. */ umtx_thread_exit(td); } diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 0e0cebe98e5f..b0dd7534633b 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -1,641 +1,631 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include #include "opt_isa.h" #include "opt_npx.h" #include "opt_reset.h" #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "__OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf."); union savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); return ((union savefpu *)p); } union savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { vm_offset_t p; p = (vm_offset_t)(pcb + 1); return ((union savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Common code shared between cpu_fork() and cpu_copy_thread() for * initializing a thread. */ static void copy_thread(struct thread *td1, struct thread *td2) { struct pcb *pcb2; pcb2 = td2->td_pcb; /* Ensure that td1's pcb is up to date for user threads. */ if ((td2->td_pflags & TDP_KTHREAD) == 0) { MPASS(td1 == curthread); td1->td_pcb->pcb_gs = rgs(); critical_enter(); if (PCPU_GET(fpcurthread) == td1) npxsave(td1->td_pcb->pcb_save); critical_exit(); } /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); /* Kernel threads start with clean NPX and segment bases. */ if ((td2->td_pflags & TDP_KTHREAD) != 0) { pcb2->pcb_gs = _udatasel; set_fsbase(td2, 0); set_gsbase(td2, 0); pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE | PCB_KERNNPX | PCB_KERNNPX_THR); } else { MPASS((pcb2->pcb_flags & (PCB_KERNNPX | PCB_KERNNPX_THR)) == 0); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); } /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td2; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp; /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_cr3: cloned above. * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt, *pldt1; mtx_lock_spin(&dt_lock); if ((pldt1 = mdp1->md_ldt) != NULL && pldt1->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt1->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_deref(pldt1); } else mtx_unlock_spin(&dt_lock); } return; } /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; copy_thread(td1, td2); /* Reset debug registers in the new process */ x86_clear_dbregs(pcb2); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe * if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - VM86_STACK_SPACE) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); /* Set child return values. */ p2->p_sysent->sv_set_fork_retval(td2); /* * If the parent process has the trap bit set (i.e. a debugger * had single stepped the process to the system call), we need * to clear the trap flag from the new frame. */ td2->td_frame->tf_eflags &= ~PSL_T; /* Set cr3 for the new process. */ pcb2->pcb_cr3 = pmap_get_cr3(vmspace_pmap(p2->p_vmspace)); /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = NULL; /* Copy the LDT, if necessary. */ mtx_lock_spin(&dt_lock); if (mdp2->md_ldt != NULL) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&dt_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } void x86_set_fork_retval(struct thread *td) { struct trapframe * frame = td->td_frame; frame->tf_eax = 0; /* Child returns zero */ frame->tf_eflags &= ~PSL_C; /* success */ frame->tf_edx = 1; /* System V emulation */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. Reset pc->pcb_gs * and %gs before we free it in case they refer to an LDT entry. */ mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) { td->td_pcb->pcb_gs = _udatasel; load_gs(_udatasel); user_ldt_free(td); } else mtx_unlock_spin(&dt_lock); } void cpu_thread_exit(struct thread *td) { critical_enter(); if (td == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); /* Disable any hardware breakpoints. */ if (td->td_pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); td->td_pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != NULL) { /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1)); pcb->pcb_ext = NULL; } } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)((caddr_t)pcb - VM86_STACK_SPACE) - 1; pcb->pcb_ext = NULL; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } bool cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused) { return (true); } int cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused, int com __unused, void *data __unused) { return (EINVAL); } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_eax = td->td_retval[0]; td->td_frame->tf_edx = td->td_retval[1]; td->td_frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, int * 0x80 is 2 bytes. We saved this in tf_err. */ td->td_frame->tf_eip -= td->td_frame->tf_err; break; case EJUSTRETURN: break; default: td->td_frame->tf_eax = error; td->td_frame->tf_eflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { copy_thread(td0, td); /* * Copy user general-purpose registers. * * Some of these registers are rewritten by cpu_set_upcall() * and linux_set_upcall(). */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_eflags &= ~PSL_T; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ int cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { - - /* - * Do any extra cleaning that needs to be done. - * The thread may have optional components - * that are not present in a fresh thread. - * This may be a recycled thread so make it look - * as though it's newly allocated. - */ - cpu_thread_clean(td); - /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_ebp = 0; td->td_frame->tf_esp = (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_eip = (int)entry; /* Return address sentinel value to stop stack unwinding. */ if (suword((void *)td->td_frame->tf_esp, 0) != 0) return (EFAULT); /* Pass the argument to the entry point. */ if (suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)arg) != 0) return (EFAULT); return (0); } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct segment_descriptor sd; uint32_t base; /* * Construct a descriptor and store it in the pcb for * the next context switch. Also store it in the gdt * so that the load of tf_fs into %fs will activate it * at return to userland. */ base = (uint32_t)tls_base; sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; sd.sd_p = 1; sd.sd_xx = 0; sd.sd_def32 = 1; sd.sd_gran = 1; critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; if (td == curthread) { PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); } critical_exit(); return (0); } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } /* * Get an sf_buf from the freelist. May block if none are available. */ void sf_buf_map(struct sf_buf *sf, int flags) { pmap_sf_buf_map(sf); #ifdef SMP sf_buf_shootdown(sf, flags); #endif } #ifdef SMP static void sf_buf_shootdown_curcpu_cb(pmap_t pmap __unused, vm_offset_t addr1 __unused, vm_offset_t addr2 __unused) { } void sf_buf_shootdown(struct sf_buf *sf, int flags) { cpuset_t other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sf->cpumask)) { CPU_SET(cpuid, &sf->cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_ANDNOT(&other_cpus, &other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap, sf_buf_shootdown_curcpu_cb); } } sched_unpin(); } #endif /* * MD part of sf_buf_free(). */ int sf_buf_unmap(struct sf_buf *sf) { return (0); } static void sf_buf_invalidate(struct sf_buf *sf) { vm_page_t m = sf->m; /* * Use pmap_qenter to update the pte for * existing mapping, in particular, the PAT * settings are recalculated. */ pmap_qenter(sf->kva, &m, 1); pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE); } /* * Invalidate the cache lines that may belong to the page, if * (possibly old) mapping of the page by sf buffer exists. Returns * TRUE when mapping was found and cache invalidated. */ boolean_t sf_buf_invalidate_cache(vm_page_t m) { return (sf_buf_process_page(m, sf_buf_invalidate)); }