Index: lib/libc/amd64/sys/Makefile.inc =================================================================== --- lib/libc/amd64/sys/Makefile.inc +++ lib/libc/amd64/sys/Makefile.inc @@ -1,7 +1,11 @@ # from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp # $FreeBSD$ -SRCS+= amd64_get_fsbase.c amd64_get_gsbase.c amd64_set_fsbase.c \ +SRCS+= \ + amd64_detect_rdfsgsbase.c \ + amd64_get_fsbase.c \ + amd64_get_gsbase.c \ + amd64_set_fsbase.c \ amd64_set_gsbase.c MDASM= vfork.S brk.S cerror.S exect.S getcontext.S \ Index: lib/libc/amd64/sys/amd64_detect_rdfsgsbase.h =================================================================== --- /dev/null +++ lib/libc/amd64/sys/amd64_detect_rdfsgsbase.h @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2017 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _AMD64_DETECT_RDFSGSBASE_H_ +#define _AMD64_DETECT_RDFSGSBASE_H_ + +enum { + RDFSGS_UNKNOWN, + RDFSGS_SUPPORTED, + RDFSGS_UNSUPPORTED, +}; + +int amd64_detect_rdfsgsbase(void); + +#endif Index: lib/libc/amd64/sys/amd64_detect_rdfsgsbase.c =================================================================== --- /dev/null +++ lib/libc/amd64/sys/amd64_detect_rdfsgsbase.c @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2017 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#define IN_RTLD 1 +#include +#undef IN_RTLD +#include +#include +#include "amd64_detect_rdfsgsbase.h" +#include "libc_private.h" + +static int state = RDFSGS_UNKNOWN; + +int +amd64_detect_rdfsgsbase(void) +{ + u_int p[4]; + + if (__predict_true(state != RDFSGS_UNKNOWN)) + return (state); + + if (__getosreldate() >= P_OSREL_WRFSBASE) { + do_cpuid(0x0, p); + if (p[0] >= 0x7) { + cpuid_count(0x7, 0x0, p); + if ((p[1] & CPUID_STDEXT_FSGSBASE) != 0) { + state = RDFSGS_SUPPORTED; + return (state); + } + } + } + state = RDFSGS_UNSUPPORTED; + return (state); +} Index: lib/libc/amd64/sys/amd64_get_fsbase.c =================================================================== --- lib/libc/amd64/sys/amd64_get_fsbase.c +++ lib/libc/amd64/sys/amd64_get_fsbase.c @@ -1,7 +1,11 @@ /*- * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2017 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -27,11 +31,21 @@ #include __FBSDID("$FreeBSD$"); +#include +#include #include +#include "amd64_detect_rdfsgsbase.h" int amd64_get_fsbase(void **addr) { - return (sysarch(AMD64_GET_FSBASE, addr)); + switch (amd64_detect_rdfsgsbase()) { + case RDFSGS_SUPPORTED: + *addr = (void *)rdfsbase(); + return (0); + case RDFSGS_UNSUPPORTED: + return (sysarch(AMD64_GET_FSBASE, addr)); + } + return (-1); /* cannot happen */ } Index: lib/libc/amd64/sys/amd64_get_gsbase.c =================================================================== --- lib/libc/amd64/sys/amd64_get_gsbase.c +++ lib/libc/amd64/sys/amd64_get_gsbase.c @@ -1,7 +1,11 @@ /*- * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2017 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -27,11 +31,21 @@ #include __FBSDID("$FreeBSD$"); +#include +#include #include +#include "amd64_detect_rdfsgsbase.h" int amd64_get_gsbase(void **addr) { - return (sysarch(AMD64_GET_GSBASE, addr)); + switch (amd64_detect_rdfsgsbase()) { + case RDFSGS_SUPPORTED: + *addr = (void *)rdgsbase(); + return (0); + case RDFSGS_UNSUPPORTED: + return (sysarch(AMD64_GET_GSBASE, addr)); + } + return (-1); /* cannot happen */ } Index: lib/libc/amd64/sys/amd64_set_fsbase.c =================================================================== --- lib/libc/amd64/sys/amd64_set_fsbase.c +++ lib/libc/amd64/sys/amd64_set_fsbase.c @@ -1,7 +1,11 @@ /*- * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2017 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -27,11 +31,21 @@ #include __FBSDID("$FreeBSD$"); +#include +#include #include +#include "amd64_detect_rdfsgsbase.h" int amd64_set_fsbase(void *addr) { - return (sysarch(AMD64_SET_FSBASE, &addr)); + switch (amd64_detect_rdfsgsbase()) { + case RDFSGS_SUPPORTED: + wrfsbase((uintptr_t)addr); + return (0); + case RDFSGS_UNSUPPORTED: + return (sysarch(AMD64_SET_FSBASE, &addr)); + } + return (-1); /* cannot happen */ } Index: lib/libc/amd64/sys/amd64_set_gsbase.c =================================================================== --- lib/libc/amd64/sys/amd64_set_gsbase.c +++ lib/libc/amd64/sys/amd64_set_gsbase.c @@ -1,7 +1,11 @@ /*- * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2017 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -27,11 +31,21 @@ #include __FBSDID("$FreeBSD$"); +#include +#include #include +#include "amd64_detect_rdfsgsbase.h" int amd64_set_gsbase(void *addr) { - return (sysarch(AMD64_SET_GSBASE, &addr)); + switch (amd64_detect_rdfsgsbase()) { + case RDFSGS_SUPPORTED: + wrgsbase((uintptr_t)addr); + return (0); + case RDFSGS_UNSUPPORTED: + return (sysarch(AMD64_SET_GSBASE, &addr)); + } + return (-1); /* cannot happen */ } Index: sys/amd64/amd64/cpu_switch.S =================================================================== --- sys/amd64/amd64/cpu_switch.S +++ sys/amd64/amd64/cpu_switch.S @@ -87,7 +87,6 @@ ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ movq TD_PCB(%rdi),%r8 - orl $PCB_FULL_IRET,PCB_FLAGS(%r8) movq (%rsp),%rax /* Hardware registers */ movq %r15,PCB_R15(%r8) @@ -99,6 +98,30 @@ movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) + testl $PCB_FULL_IRET,PCB_FLAGS(%r8) + jnz 2f + orl $PCB_FULL_IRET,PCB_FLAGS(%r8) + testl $TDP_KTHREAD,TD_PFLAGS(%rdi) + jnz 2f + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + movl %fs,%eax + cmpl $KUF32SEL,%eax + jne 1f + rdfsbaseq %rax + movq %rax,PCB_FSBASE(%r8) +1: movl %gs,%eax + cmpl $KUG32SEL,%eax + jne 2f + movq %rdx,%r12 + movl $MSR_KGSBASE,%ecx /* Read user gs base */ + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%r8) + movq %r12,%rdx + +2: testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: Index: sys/amd64/amd64/exception.S =================================================================== --- sys/amd64/amd64/exception.S +++ sys/amd64/amd64/exception.S @@ -187,12 +187,13 @@ jz alltraps_pushregs_no_rdi sti alltraps_pushregs_no_rdi: - movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rax,TF_RAX(%rsp) +alltraps_pushregs_no_rax: + movq %rsi,TF_RSI(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) @@ -326,22 +327,44 @@ prot_addrf: movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) leaq doreti_iret(%rip),%rdi cmpq %rdi,TF_RIP(%rsp) - je 1f /* kernel but with user gsbase!! */ + je 5f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 2f /* already running with kernel GS.base */ -1: swapgs -2: movq PCPU(CURPCB),%rdi - orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) + jz 6f /* already running with kernel GS.base */ + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 1f + rdfsbaseq %rax +1: cmpw $KUG32SEL,TF_GS(%rsp) + jne 2f + rdgsbaseq %rdx +2: swapgs + movq PCPU(CURPCB),%rdi + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 4f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 3f + movq %rax,PCB_FSBASE(%rdi) +3: cmpw $KUG32SEL,TF_GS(%rsp) + jne 4f + movq %rdx,PCB_GSBASE(%rdi) +4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) - jz alltraps_pushregs_no_rdi + jz alltraps_pushregs_no_rax sti - jmp alltraps_pushregs_no_rdi + jmp alltraps_pushregs_no_rax + +5: swapgs +6: movq PCPU(CURPCB),%rdi + jmp 4b /* * Fast syscall entry point. We enter here with just our new %cs/%ss set, @@ -349,8 +372,8 @@ * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. * - * We do not support invoking this from a custom %cs or %ss (e.g. using - * entries from an LDT). + * We do not support invoking this from a custom segment registers, + * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. */ IDTVEC(fast_syscall) swapgs @@ -503,6 +526,23 @@ nmi_fromuserspace: incl %ebx swapgs + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + movq PCPU(CURPCB),%rdi + testq %rdi,%rdi + jz 2f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 1f + rdfsbaseq %rax + movq %rax,PCB_FSBASE(%rdi) +1: cmpw $KUG32SEL,TF_GS(%rsp) + jne 2f + movl $MSR_KGSBASE,%ecx + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%rdi) +2: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) @@ -705,6 +745,7 @@ jz ld_regs testl $PCB_FULL_IRET,PCB_FLAGS(%r8) jz ld_regs + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) testl $TF_HASSEGS,TF_FLAGS(%rsp) je set_segs Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c +++ sys/amd64/amd64/machdep.c @@ -372,6 +372,7 @@ sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); + set_pcb_flags(pcb, PCB_FULL_IRET); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, @@ -442,7 +443,6 @@ regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; - set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -548,6 +548,7 @@ return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; @@ -559,7 +560,6 @@ #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); - set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } @@ -587,11 +587,11 @@ else mtx_unlock(&dt_lock); + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; - set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; @@ -2135,6 +2135,7 @@ mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); + set_pcb_flags(pcb, PCB_FULL_IRET); /* update pcb_f/gsbase */ mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; @@ -2205,11 +2206,11 @@ tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } + set_pcb_flags(pcb, PCB_FULL_IRET); if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } - set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } @@ -2480,6 +2481,60 @@ return 0; } +/* + * The pcb_flags is only modified by current thread, or by other threads + * when current thread is stopped. However, current thread may change it + * from the interrupt context in cpu_switch(), or in the trap handler. + * When we read-modify-write pcb_flags from C sources, compiler may generate + * code that is not atomic regarding the interrupt handler. If a trap or + * interrupt happens and any flag is modified from the handler, it can be + * clobbered with the cached value later. Therefore, we implement setting + * and clearing flags with single-instruction functions, which do not race + * with possible modification of the flags from the trap or interrupt context, + * because traps and interrupts are executed only on instruction boundary. + */ +void +set_pcb_flags_raw(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("orl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) + : "cc", "memory"); + +} + +void +set_pcb_flags(struct pcb *pcb, const u_int flags) +{ + register_t r; + + if (curpcb == pcb && + (flags & PCB_FULL_IRET) != 0 && + (pcb->pcb_flags & PCB_FULL_IRET) == 0 && + (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) { + r = intr_disable(); + if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { + if (rfs() == _ufssel) + pcb->pcb_fsbase = rdfsbase(); + if (rgs() == _ugssel) + pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); + } + set_pcb_flags_raw(pcb, flags); + intr_restore(r); + } else { + set_pcb_flags_raw(pcb, flags); + } +} + +void +clear_pcb_flags(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("andl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) + : "cc", "memory"); +} + #ifdef KDB /* Index: sys/amd64/amd64/ptrace_machdep.c =================================================================== --- sys/amd64/amd64/ptrace_machdep.c +++ sys/amd64/amd64/ptrace_machdep.c @@ -117,15 +117,17 @@ static void cpu_ptrace_setbase(struct thread *td, int req, register_t r) { + struct pcb *pcb; + pcb = td->td_pcb; + set_pcb_flags(pcb, PCB_FULL_IRET); if (req == PT_SETFSBASE) { - td->td_pcb->pcb_fsbase = r; + pcb->pcb_fsbase = r; td->td_frame->tf_fs = _ufssel; } else { - td->td_pcb->pcb_gsbase = r; + pcb->pcb_gsbase = r; td->td_frame->tf_gs = _ugssel; } - set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } #ifdef COMPAT_FREEBSD32 @@ -136,6 +138,7 @@ cpu32_ptrace(struct thread *td, int req, void *addr, int data) { struct savefpu *fpstate; + struct pcb *pcb; uint32_t r; int error; @@ -167,8 +170,11 @@ error = EINVAL; break; } - r = req == PT_GETFSBASE ? td->td_pcb->pcb_fsbase : - td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + /* update pcb_f/gsbase */ + set_pcb_flags(pcb, PCB_FULL_IRET); + r = req == PT_GETFSBASE ? pcb->pcb_fsbase : pcb->pcb_gsbase; error = copyout(&r, addr, sizeof(r)); break; @@ -197,6 +203,7 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data) { register_t *r, rv; + struct pcb *pcb; int error; #ifdef COMPAT_FREEBSD32 @@ -221,8 +228,11 @@ case PT_GETFSBASE: case PT_GETGSBASE: - r = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsbase : - &td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + /* update pcb_f/gsbase */ + set_pcb_flags(pcb, PCB_FULL_IRET); + r = req == PT_GETFSBASE ? &pcb->pcb_fsbase : &pcb->pcb_gsbase; error = copyout(r, addr, sizeof(*r)); break; Index: sys/amd64/amd64/sys_machdep.c =================================================================== --- sys/amd64/amd64/sys_machdep.c +++ sys/amd64/amd64/sys_machdep.c @@ -254,39 +254,45 @@ error = amd64_set_ioperm(td, &iargs); break; case I386_GET_FSBASE: + set_pcb_flags(pcb, PCB_FULL_IRET); i386base = pcb->pcb_fsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_FSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = i386base; td->td_frame->tf_fs = _ufssel; update_gdt_fsbase(td, i386base); } break; case I386_GET_GSBASE: + set_pcb_flags(pcb, PCB_FULL_IRET); i386base = pcb->pcb_gsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_GSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = i386base; td->td_frame->tf_gs = _ugssel; update_gdt_gsbase(td, i386base); } break; case AMD64_GET_FSBASE: - error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); + set_pcb_flags(pcb, PCB_FULL_IRET); + error = copyout(&pcb->pcb_fsbase, uap->parms, + sizeof(pcb->pcb_fsbase)); break; case AMD64_SET_FSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_fsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_fsbase = a64base; td->td_frame->tf_fs = _ufssel; } else error = EINVAL; @@ -294,15 +300,17 @@ break; case AMD64_GET_GSBASE: - error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase)); + set_pcb_flags(pcb, PCB_FULL_IRET); + error = copyout(&pcb->pcb_gsbase, uap->parms, + sizeof(pcb->pcb_gsbase)); break; case AMD64_SET_GSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_gsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_gsbase = a64base; td->td_frame->tf_gs = _ugssel; } else error = EINVAL; Index: sys/amd64/amd64/vm_machdep.c =================================================================== --- sys/amd64/amd64/vm_machdep.c +++ sys/amd64/amd64/vm_machdep.c @@ -238,7 +238,7 @@ pcb2->pcb_tssp = NULL; /* New segment registers. */ - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; @@ -439,7 +439,7 @@ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* * Create a new fresh stack for the new thread. Index: sys/amd64/include/asmacros.h =================================================================== --- sys/amd64/include/asmacros.h +++ sys/amd64/include/asmacros.h @@ -177,7 +177,12 @@ movw %es,TF_ES(%rsp) ; \ movw %ds,TF_DS(%rsp) ; \ movl $TF_HASSEGS,TF_FLAGS(%rsp) ; \ - cld + cld ; \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 2f ; /* Yes, dont swapgs again */ \ + movq PCPU(CURPCB),%r8 ; \ + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) ; \ +2: #define POP_FRAME \ movq TF_RDI(%rsp),%rdi ; \ Index: sys/amd64/include/pcb.h =================================================================== --- sys/amd64/include/pcb.h +++ sys/amd64/include/pcb.h @@ -119,37 +119,10 @@ #ifdef _KERNEL struct trapframe; -/* - * The pcb_flags is only modified by current thread, or by other threads - * when current thread is stopped. However, current thread may change it - * from the interrupt context in cpu_switch(), or in the trap handler. - * When we read-modify-write pcb_flags from C sources, compiler may generate - * code that is not atomic regarding the interrupt handler. If a trap or - * interrupt happens and any flag is modified from the handler, it can be - * clobbered with the cached value later. Therefore, we implement setting - * and clearing flags with single-instruction functions, which do not race - * with possible modification of the flags from the trap or interrupt context, - * because traps and interrupts are executed only on instruction boundary. - */ -static __inline void -set_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("orl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) - : "cc"); -} - -static __inline void -clear_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("andl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) - : "cc"); -} - +void clear_pcb_flags(struct pcb *pcb, const u_int flags); void makectx(struct trapframe *, struct pcb *); +void set_pcb_flags(struct pcb *pcb, const u_int flags); +void set_pcb_flags_raw(struct pcb *pcb, const u_int flags); int savectx(struct pcb *) __returns_twice; void resumectx(struct pcb *); Index: sys/sys/param.h =================================================================== --- sys/sys/param.h +++ sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1200040 /* Master, propagated to newvers */ +#define __FreeBSD_version 1200041 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, @@ -83,6 +83,7 @@ #define P_OSREL_MAP_FSTRICT 1100036 #define P_OSREL_SHUTDOWN_ENOTCONN 1100077 #define P_OSREL_MAP_GUARD 1200035 +#define P_OSREL_WRFSBASE 1200041 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif