Index: head/sys/amd64/amd64/copyout.c =================================================================== --- head/sys/amd64/amd64/copyout.c +++ head/sys/amd64/amd64/copyout.c @@ -0,0 +1,178 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include + +int fubyte_nosmap(volatile const void *base); +int fubyte_smap(volatile const void *base); +DEFINE_IFUNC(, int, fubyte, (volatile const void *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fubyte_smap : fubyte_nosmap); +} + +int fuword16_nosmap(volatile const void *base); +int fuword16_smap(volatile const void *base); +DEFINE_IFUNC(, int, fuword16, (volatile const void *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fuword16_smap : fuword16_nosmap); +} + +int fueword_nosmap(volatile const void *base, long *val); +int fueword_smap(volatile const void *base, long *val); +DEFINE_IFUNC(, int, fueword, (volatile const void *, long *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fueword_smap : fueword_nosmap); +} +DEFINE_IFUNC(, int, fueword64, (volatile const void *, int64_t *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fueword_smap : fueword_nosmap); +} + +int fueword32_nosmap(volatile const void *base, int32_t *val); +int fueword32_smap(volatile const void *base, int32_t *val); +DEFINE_IFUNC(, int, fueword32, (volatile const void *, int32_t *), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + fueword32_smap : fueword32_nosmap); +} + +int subyte_nosmap(volatile void *base, int byte); +int subyte_smap(volatile void *base, int byte); +DEFINE_IFUNC(, int, subyte, (volatile void *, int), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + subyte_smap : subyte_nosmap); +} + +int suword16_nosmap(volatile void *base, int word); +int suword16_smap(volatile void *base, int word); +DEFINE_IFUNC(, int, suword16, (volatile void *, int), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword16_smap : suword16_nosmap); +} + +int suword32_nosmap(volatile void *base, int32_t word); +int suword32_smap(volatile void *base, int32_t word); +DEFINE_IFUNC(, int, suword32, (volatile void *, int32_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword32_smap : suword32_nosmap); +} + +int suword_nosmap(volatile void *base, long word); +int suword_smap(volatile void *base, long word); +DEFINE_IFUNC(, int, suword, (volatile void *, long), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword_smap : suword_nosmap); +} +DEFINE_IFUNC(, int, suword64, (volatile void *, int64_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + suword_smap : suword_nosmap); +} + +int casueword32_nosmap(volatile uint32_t *base, uint32_t oldval, + uint32_t *oldvalp, uint32_t newval); +int casueword32_smap(volatile uint32_t *base, uint32_t oldval, + uint32_t *oldvalp, uint32_t newval); +DEFINE_IFUNC(, int, casueword32, (volatile uint32_t *, uint32_t, uint32_t *, + uint32_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + casueword32_smap : casueword32_nosmap); +} + +int casueword_nosmap(volatile u_long *p, u_long oldval, u_long *oldvalp, + u_long newval); +int casueword_smap(volatile u_long *p, u_long oldval, u_long *oldvalp, + u_long newval); +DEFINE_IFUNC(, int, casueword, (volatile u_long *, u_long, u_long *, u_long), + static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + casueword_smap : casueword_nosmap); +} + +int copyinstr_nosmap(const void *udaddr, void *kaddr, size_t len, + size_t *lencopied); +int copyinstr_smap(const void *udaddr, void *kaddr, size_t len, + size_t *lencopied); +DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *), + static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + copyinstr_smap : copyinstr_nosmap); +} + +int copyin_nosmap(const void *udaddr, void *kaddr, size_t len); +int copyin_smap(const void *udaddr, void *kaddr, size_t len); +DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + copyin_smap : copyin_nosmap); +} + +int copyout_nosmap(const void *kaddr, void *udaddr, size_t len); +int copyout_smap(const void *kaddr, void *udaddr, size_t len); +DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static) +{ + + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? + copyout_smap : copyout_nosmap); +} Index: head/sys/amd64/amd64/exception.S =================================================================== --- head/sys/amd64/amd64/exception.S +++ head/sys/amd64/amd64/exception.S @@ -43,8 +43,8 @@ #include "assym.inc" -#include #include +#include #include #include @@ -196,7 +196,9 @@ movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq FAKE_MCOUNT(TF_RIP(%rsp)) #ifdef KDTRACE_HOOKS /* @@ -277,7 +279,9 @@ movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs @@ -571,7 +575,9 @@ movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq testb $SEL_RPL_MASK,TF_CS(%rsp) jnz dbg_fromuserspace /* @@ -704,7 +710,9 @@ movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace @@ -793,7 +801,9 @@ subq %rcx,%rdx movq %rdx,%rdi /* destination stack pointer */ shrq $3,%rcx /* trap frame size in long words */ - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq rep movsq /* copy trapframe */ movq %rdx,%rsp /* we are on the regular kstack */ @@ -902,7 +912,9 @@ movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz mchk_fromuserspace Index: head/sys/amd64/amd64/initcpu.c =================================================================== --- head/sys/amd64/amd64/initcpu.c +++ head/sys/amd64/amd64/initcpu.c @@ -239,8 +239,12 @@ * to the kernel tables. The boot loader enables the U bit in * its tables. */ - if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP)) - cr4 |= CR4_SMEP; + if (!IS_BSP()) { + if (cpu_stdext_feature & CPUID_STDEXT_SMEP) + cr4 |= CR4_SMEP; + if (cpu_stdext_feature & CPUID_STDEXT_SMAP) + cr4 |= CR4_SMAP; + } load_cr4(cr4); if (IS_BSP() && (amd_feature & AMDID_NX) != 0) { msr = rdmsr(MSR_EFER) | EFER_NXE; Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c +++ head/sys/amd64/amd64/machdep.c @@ -1548,7 +1548,7 @@ msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); - wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D); + wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); } u_int64_t Index: head/sys/amd64/amd64/pmap.c =================================================================== --- head/sys/amd64/amd64/pmap.c +++ head/sys/amd64/amd64/pmap.c @@ -1092,6 +1092,7 @@ { vm_offset_t va; pt_entry_t *pte; + uint64_t cr4; int i; KERNend = *firstaddr; @@ -1118,11 +1119,21 @@ virtual_end = VM_MAX_KERNEL_ADDRESS; - /* XXX do %cr0 as well */ - load_cr4(rcr4() | CR4_PGE); + /* + * Enable PG_G global pages, then switch to the kernel page + * table from the bootstrap page table. After the switch, it + * is possible to enable SMEP and SMAP since PG_U bits are + * correct now. + */ + cr4 = rcr4(); + cr4 |= CR4_PGE; + load_cr4(cr4); load_cr3(KPML4phys); if (cpu_stdext_feature & CPUID_STDEXT_SMEP) - load_cr4(rcr4() | CR4_SMEP); + cr4 |= CR4_SMEP; + if (cpu_stdext_feature & CPUID_STDEXT_SMAP) + cr4 |= CR4_SMAP; + load_cr4(cr4); /* * Initialize the kernel pmap (which is statically allocated). Index: head/sys/amd64/amd64/support.S =================================================================== --- head/sys/amd64/amd64/support.S +++ head/sys/amd64/amd64/support.S @@ -226,7 +226,7 @@ * copyout(from_kernel, to_user, len) * %rdi, %rsi, %rdx */ -ENTRY(copyout) +ENTRY(copyout_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyout_fault,PCB_ONFAULT(%rax) @@ -268,6 +268,55 @@ rep movsb + jmp done_copyout +END(copyout_nosmap) + +ENTRY(copyout_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rax + /* Trap entry clears PSL.AC */ + movq $copyout_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ + jz done_copyout + + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + */ + + /* + * First, prevent address wrapping. + */ + movq %rsi,%rax + addq %rdx,%rax + jc copyout_fault +/* + * XXX STOP USING VM_MAXUSER_ADDRESS. + * It is an end address, not a max, so every time it is used correctly it + * looks like there is an off by one error, and of course it caused an off + * by one error in several places. + */ + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax + ja copyout_fault + + xchgq %rdi,%rsi + /* bcopy(%rsi, %rdi, %rdx) */ + movq %rdx,%rcx + + shrq $3,%rcx + cld + stac + rep + movsq + movb %dl,%cl + andb $7,%cl + rep + movsb + clac + done_copyout: xorl %eax,%eax movq PCPU(CURPCB),%rdx @@ -288,7 +337,7 @@ * copyin(from_user, to_kernel, len) * %rdi, %rsi, %rdx */ -ENTRY(copyin) +ENTRY(copyin_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyin_fault,PCB_ONFAULT(%rax) @@ -309,13 +358,47 @@ movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ + cld rep movsq movb %al,%cl andb $7,%cl /* copy remaining bytes */ + rep + movsb + + jmp done_copyin +END(copyin_nosmap) + +ENTRY(copyin_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rax + movq $copyin_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ + jz done_copyin + + /* + * make sure address is valid + */ + movq %rdi,%rax + addq %rdx,%rax + jc copyin_fault + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax + ja copyin_fault + + xchgq %rdi,%rsi + movq %rdx,%rcx + movb %cl,%al + shrq $3,%rcx /* copy longword-wise */ + stac + rep + movsq + movb %al,%cl + andb $7,%cl /* copy remaining bytes */ je done_copyin rep movsb + clac done_copyin: xorl %eax,%eax @@ -323,6 +406,7 @@ movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret +END(copyin_smap) ALIGN_TEXT copyin_fault: @@ -331,14 +415,13 @@ movq $EFAULT,%rax POP_FRAME_POINTER ret -END(copyin) /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ -ENTRY(casueword32) +ENTRY(casueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) @@ -370,14 +453,50 @@ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret -END(casueword32) +END(casueword32_nosmap) +ENTRY(casueword32_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%r8 + movq $fusufault,PCB_ONFAULT(%r8) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + movl %esi,%eax /* old */ + stac +#ifdef SMP + lock +#endif + cmpxchgl %ecx,(%rdi) /* new = %ecx */ + clac + + /* + * The old value is in %eax. If the store succeeded it will be the + * value we expected (old) from before the store, otherwise it will + * be the current value. Save %eax into %esi to prepare the return + * value. + */ + movl %eax,%esi + xorl %eax,%eax + movq %rax,PCB_ONFAULT(%r8) + + /* + * Access the oldp after the pcb_onfault is cleared, to correctly + * catch corrupted pointer. + */ + movl %esi,(%rdx) /* oldp = %rdx */ + POP_FRAME_POINTER + ret +END(casueword32_smap) + /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ -ENTRY(casueword) +ENTRY(casueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) @@ -403,16 +522,45 @@ movq %rsi,(%rdx) POP_FRAME_POINTER ret -END(casueword) +END(casueword_nosmap) +ENTRY(casueword_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%r8 + movq $fusufault,PCB_ONFAULT(%r8) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + movq %rsi,%rax /* old */ + stac +#ifdef SMP + lock +#endif + cmpxchgq %rcx,(%rdi) /* new = %rcx */ + clac + + /* + * The old value is in %rax. If the store succeeded it will be the + * value we expected (old) from before the store, otherwise it will + * be the current value. + */ + movq %rax,%rsi + xorl %eax,%eax + movq %rax,PCB_ONFAULT(%r8) + movq %rsi,(%rdx) + POP_FRAME_POINTER + ret +END(casueword_smap) + /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit * byte from user memory. * addr = %rdi, valp = %rsi */ -ALTENTRY(fueword64) -ENTRY(fueword) +ENTRY(fueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -427,14 +575,32 @@ movq %r11,(%rsi) POP_FRAME_POINTER ret -END(fueword64) -END(fueword) +END(fueword64_nosmap) -ENTRY(fueword32) +ENTRY(fueword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + xorl %eax,%eax + stac + movq (%rdi),%r11 + clac + movq %rax,PCB_ONFAULT(%rcx) + movq %r11,(%rsi) + POP_FRAME_POINTER + ret +END(fueword64_smap) + +ENTRY(fueword32_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault @@ -445,13 +611,32 @@ movl %r11d,(%rsi) POP_FRAME_POINTER ret -END(fueword32) +END(fueword32_nosmap) -ENTRY(fuword16) +ENTRY(fueword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + + xorl %eax,%eax + stac + movl (%rdi),%r11d + clac + movq %rax,PCB_ONFAULT(%rcx) + movl %r11d,(%rsi) + POP_FRAME_POINTER + ret +END(fueword32_smap) + +ENTRY(fuword16_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault @@ -460,13 +645,30 @@ movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(fuword16) +END(fuword16_nosmap) -ENTRY(fubyte) +ENTRY(fuword16_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi + ja fusufault + + stac + movzwl (%rdi),%eax + clac + movq $0,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(fuword16_smap) + +ENTRY(fubyte_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault @@ -475,9 +677,27 @@ movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(fubyte) +END(fubyte_nosmap) +ENTRY(fubyte_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi + ja fusufault + + stac + movzbl (%rdi),%eax + clac + movq $0,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(fubyte_smap) + ALIGN_TEXT + /* Fault entry clears PSL.AC */ fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax @@ -491,8 +711,7 @@ * user memory. * addr = %rdi, value = %rsi */ -ALTENTRY(suword64) -ENTRY(suword) +ENTRY(suword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -507,14 +726,32 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(suword64) -END(suword) +END(suword_nosmap) -ENTRY(suword32) +ENTRY(suword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + stac + movq %rsi,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(suword_smap) + +ENTRY(suword32_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault @@ -525,13 +762,32 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(suword32) +END(suword32_nosmap) -ENTRY(suword16) +ENTRY(suword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + stac + movl %esi,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(suword32_smap) + +ENTRY(suword16_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault @@ -542,13 +798,32 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(suword16) +END(suword16_nosmap) -ENTRY(subyte) +ENTRY(suword16_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + stac + movw %si,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(suword16_smap) + +ENTRY(subyte_nosmap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault @@ -560,8 +835,28 @@ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret -END(subyte) +END(subyte_nosmap) +ENTRY(subyte_smap) + PUSH_FRAME_POINTER + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + movl %esi,%eax + stac + movb %al,(%rdi) + clac + xorl %eax,%eax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) + POP_FRAME_POINTER + ret +END(subyte_smap) + /* * copyinstr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx @@ -571,7 +866,7 @@ * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ -ENTRY(copyinstr) +ENTRY(copyinstr_nosmap) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq %rcx,%r9 /* %r9 = *len */ @@ -592,29 +887,67 @@ movq %rax,%r8 1: incq %rdx + cld 2: decq %rdx - jz 3f + jz copyinstr_toolong lodsb stosb orb %al,%al jnz 2b + jmp copyinstr_succ +END(copyinstr_nosmap) + +ENTRY(copyinstr_smap) + PUSH_FRAME_POINTER + movq %rdx,%r8 /* %r8 = maxlen */ + movq %rcx,%r9 /* %r9 = *len */ + xchgq %rdi,%rsi /* %rdi = from, %rsi = to */ + movq PCPU(CURPCB),%rcx + movq $cpystrflt,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS,%rax + + /* make sure 'from' is within bounds */ + subq %rsi,%rax + jbe cpystrflt + + /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ + cmpq %rdx,%rax + jae 1f + movq %rax,%rdx + movq %rax,%r8 +1: + incq %rdx + +2: + decq %rdx + jz copyinstr_succ + + stac + lodsb + stosb + clac + orb %al,%al + jnz 2b + +copyinstr_succ: /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp cpystrflt_x -3: +copyinstr_toolong: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax cmpq %rax,%rsi jae cpystrflt -4: movq $ENAMETOOLONG,%rax jmp cpystrflt_x + /* Fault entry clears PSL.AC */ cpystrflt: movq $EFAULT,%rax @@ -630,7 +963,7 @@ 1: POP_FRAME_POINTER ret -END(copyinstr) +END(copyinstr_smap) /* * copystr(from, to, maxlen, int *lencopied) Index: head/sys/amd64/amd64/trap.c =================================================================== --- head/sys/amd64/amd64/trap.c +++ head/sys/amd64/amd64/trap.c @@ -673,6 +673,24 @@ trap(frame); } +static bool +trap_is_smap(struct trapframe *frame) +{ + + /* + * A page fault on a userspace address is classified as + * SMAP-induced if: + * - SMAP is supported; + * - kernel mode accessed present data page; + * - rflags.AC was cleared. + * Kernel must never access user space with rflags.AC cleared + * if SMAP is enabled. + */ + return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 && + (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) == + PGEX_P && (frame->tf_rflags & PSL_AC) == 0); +} + static int trap_pfault(struct trapframe *frame, int usermode) { @@ -750,9 +768,13 @@ * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. + * + * If SMAP is enabled, filter SMAP faults also, + * because illegal access might occur to the mapped + * user address, causing infinite loop. */ if (!usermode && (td->td_intr_nesting_level != 0 || - curpcb->pcb_onfault == NULL)) { + trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } Index: head/sys/amd64/ia32/ia32_exception.S =================================================================== --- head/sys/amd64/ia32/ia32_exception.S +++ head/sys/amd64/ia32/ia32_exception.S @@ -70,7 +70,9 @@ movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D | PSL_AC),(%rsp) + popfq FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call ia32_syscall Index: head/sys/amd64/include/asmacros.h =================================================================== --- head/sys/amd64/include/asmacros.h +++ head/sys/amd64/include/asmacros.h @@ -255,7 +255,9 @@ movq %r15,TF_R15(%rsp) SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld + pushfq + andq $~(PSL_D|PSL_AC),(%rsp) + popfq testb $SEL_RPL_MASK,TF_CS(%rsp) /* come from kernel ? */ jz 1f /* yes, leave PCB_FULL_IRET alone */ movq PCPU(CURPCB),%r8 Index: head/sys/conf/files.amd64 =================================================================== --- head/sys/conf/files.amd64 +++ head/sys/conf/files.amd64 @@ -128,6 +128,7 @@ #amd64/amd64/apic_vector.S standard amd64/amd64/bios.c standard amd64/amd64/bpf_jit_machdep.c optional bpf_jitter +amd64/amd64/copyout.c standard amd64/amd64/cpu_switch.S standard amd64/amd64/db_disasm.c optional ddb amd64/amd64/db_interface.c optional ddb Index: head/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S =================================================================== --- head/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S +++ head/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S @@ -28,6 +28,7 @@ #include "assym.inc" +#include #include #include