diff --git a/share/man/man9/casuword.9 b/share/man/man9/casuword.9 index bd8849ac7a18..a50b0290f0e8 100644 --- a/share/man/man9/casuword.9 +++ b/share/man/man9/casuword.9 @@ -1,113 +1,115 @@ -.\" Copyright (c) 2014 The FreeBSD Foundation +.\" Copyright (c) 2014, 2019 The FreeBSD Foundation .\" All rights reserved. .\" .\" Part of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 21, 2014 +.Dd April 19, 2019 .Dt CASU 9 .Os .Sh NAME .Nm casueword , .Nm casueword32 , .Nm casuword , .Nm casuword32 .Nd fetch, compare and store data from user-space .Sh SYNOPSIS .In sys/types.h .In sys/systm.h .Ft int .Fo casueword .Fa "volatile u_long *base" .Fa "u_long oldval" .Fa "u_long *oldvalp" .Fa "u_long newval" .Fc .Ft int .Fo casueword32 .Fa "volatile uint32_t *base" .Fa "uint32_t oldval" .Fa "uint32_t *oldvalp" .Fa "uint32_t newval" .Fc .Ft u_long .Fo casuword .Fa "volatile u_long *base" .Fa "u_long oldval" .Fa "u_long newval" .Fc .Ft uint32_t .Fo casuword32 .Fa "volatile uint32_t *base" .Fa "uint32_t oldval" .Fa "uint32_t newval" .Fc .Sh DESCRIPTION The .Nm functions are designed to perform atomic compare-and-swap operation on the value in the usermode memory of the current process. .Pp The .Nm routines reads the value from user memory with address .Pa base , and compare the value read with .Pa oldval . If the values are equal, .Pa newval is written to the .Pa *base . In case of .Fn casueword32 and .Fn casueword , old value is stored into the (kernel-mode) variable pointed by .Pa *oldvalp . The userspace value must be naturally aligned. .Pp The callers of .Fn casuword and .Fn casuword32 functions cannot distinguish between -1 read from userspace and function failure. .Sh RETURN VALUES The .Fn casuword and .Fn casuword32 functions return the data fetched or -1 on failure. The .Fn casueword and .Fn casueword32 -functions return 0 on success and -1 on failure. +functions return 0 on success, -1 on failure to access memory, +and 1 when comparison or store failed. +The store can fail on load-linked/store-conditional architectures. .Sh SEE ALSO .Xr atomic 9 , .Xr fetch 9 , .Xr store 9 diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 99609f5bc01a..545967f7ae86 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -1,1865 +1,1873 @@ /*- * Copyright (c) 2018-2019 The FreeBSD Foundation * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Portions of this software were developed by * Konstantin Belousov under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include #include #include #include "assym.inc" .text /* Address: %rdi */ ENTRY(pagezero_std) PUSH_FRAME_POINTER movl $PAGE_SIZE/8,%ecx xorl %eax,%eax rep stosq POP_FRAME_POINTER ret END(pagezero_std) ENTRY(pagezero_erms) PUSH_FRAME_POINTER movl $PAGE_SIZE,%ecx xorl %eax,%eax rep stosb POP_FRAME_POINTER ret END(pagezero_erms) /* * pagecopy(%rdi=from, %rsi=to) */ ENTRY(pagecopy) PUSH_FRAME_POINTER movl $PAGE_SIZE/8,%ecx movq %rdi,%r9 movq %rsi,%rdi movq %r9,%rsi rep movsq POP_FRAME_POINTER ret END(pagecopy) /* Address: %rdi */ ENTRY(sse2_pagezero) PUSH_FRAME_POINTER movq $-PAGE_SIZE,%rdx subq %rdx,%rdi xorl %eax,%eax jmp 1f /* * The loop takes 29 bytes. Ensure that it doesn't cross a 32-byte * cache line. */ .p2align 5,0x90 1: movnti %rax,(%rdi,%rdx) movnti %rax,8(%rdi,%rdx) movnti %rax,16(%rdi,%rdx) movnti %rax,24(%rdi,%rdx) addq $32,%rdx jne 1b sfence POP_FRAME_POINTER ret END(sse2_pagezero) /* * memcmpy(b1, b2, len) * rdi,rsi,len */ ENTRY(memcmp) PUSH_FRAME_POINTER cmpq $16,%rdx jae 5f 1: testq %rdx,%rdx je 3f xorl %ecx,%ecx 2: movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jz 3f movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jz 3f movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jz 3f movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jne 2b 3: xorl %eax,%eax POP_FRAME_POINTER ret 4: subl %r8d,%eax POP_FRAME_POINTER ret 5: cmpq $32,%rdx jae 7f 6: /* * 8 bytes */ movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 jne 1b leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi subq $8,%rdx cmpq $8,%rdx jae 6b jl 1b jmp 3b 7: /* * 32 bytes */ movq (%rsi),%r8 movq 8(%rsi),%r9 subq (%rdi),%r8 subq 8(%rdi),%r9 or %r8,%r9 jnz 1b movq 16(%rsi),%r8 movq 24(%rsi),%r9 subq 16(%rdi),%r8 subq 24(%rdi),%r9 or %r8,%r9 jnz 1b leaq 32(%rdi),%rdi leaq 32(%rsi),%rsi subq $32,%rdx cmpq $32,%rdx jae 7b jnz 1b jmp 3b END(memcmp) /* * memmove(dst, src, cnt) * rdi, rsi, rdx */ /* * Register state at entry is supposed to be as follows: * rdi - destination * rsi - source * rdx - count * * The macro possibly clobbers the above and: rcx, r8, r9, r10 * It does not clobber rax nor r11. */ .macro MEMMOVE erms overlap begin end \begin /* * For sizes 0..32 all data is read before it is written, so there * is no correctness issue with direction of copying. */ cmpq $32,%rcx jbe 101632f .if \overlap == 1 movq %rdi,%r8 subq %rsi,%r8 cmpq %rcx,%r8 /* overlapping && src < dst? */ jb 2f .endif cmpq $256,%rcx ja 1256f 103200: movq (%rsi),%rdx movq %rdx,(%rdi) movq 8(%rsi),%rdx movq %rdx,8(%rdi) movq 16(%rsi),%rdx movq %rdx,16(%rdi) movq 24(%rsi),%rdx movq %rdx,24(%rdi) leaq 32(%rsi),%rsi leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx jae 103200b cmpb $0,%cl jne 101632f \end ret ALIGN_TEXT 101632: cmpb $16,%cl jl 100816f movq (%rsi),%rdx movq 8(%rsi),%r8 movq -16(%rsi,%rcx),%r9 movq -8(%rsi,%rcx),%r10 movq %rdx,(%rdi) movq %r8,8(%rdi) movq %r9,-16(%rdi,%rcx) movq %r10,-8(%rdi,%rcx) \end ret ALIGN_TEXT 100816: cmpb $8,%cl jl 100408f movq (%rsi),%rdx movq -8(%rsi,%rcx),%r8 movq %rdx,(%rdi) movq %r8,-8(%rdi,%rcx,) \end ret ALIGN_TEXT 100408: cmpb $4,%cl jl 100204f movl (%rsi),%edx movl -4(%rsi,%rcx),%r8d movl %edx,(%rdi) movl %r8d,-4(%rdi,%rcx) \end ret ALIGN_TEXT 100204: cmpb $2,%cl jl 100001f movzwl (%rsi),%edx movzwl -2(%rsi,%rcx),%r8d movw %dx,(%rdi) movw %r8w,-2(%rdi,%rcx) \end ret ALIGN_TEXT 100001: cmpb $1,%cl jl 100000f movb (%rsi),%dl movb %dl,(%rdi) 100000: \end ret ALIGN_TEXT 1256: testb $15,%dil jnz 100f .if \erms == 1 rep movsb .else shrq $3,%rcx /* copy by 64-bit words */ rep movsq movq %rdx,%rcx andl $7,%ecx /* any bytes left? */ jne 100408b .endif \end ret 100: movq (%rsi),%r8 movq 8(%rsi),%r9 movq %rdi,%r10 movq %rdi,%rcx andq $15,%rcx leaq -16(%rdx,%rcx),%rdx neg %rcx leaq 16(%rdi,%rcx),%rdi leaq 16(%rsi,%rcx),%rsi movq %rdx,%rcx .if \erms == 1 rep movsb movq %r8,(%r10) movq %r9,8(%r10) .else shrq $3,%rcx /* copy by 64-bit words */ rep movsq movq %r8,(%r10) movq %r9,8(%r10) movq %rdx,%rcx andl $7,%ecx /* any bytes left? */ jne 100408b .endif \end ret .if \overlap == 1 /* * Copy backwards. */ ALIGN_TEXT 2: cmpq $256,%rcx ja 2256f leaq -8(%rdi,%rcx),%rdi leaq -8(%rsi,%rcx),%rsi cmpq $32,%rcx jb 2016f 2032: movq (%rsi),%rdx movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx jae 2032b cmpb $0,%cl jne 2016f \end ret ALIGN_TEXT 2016: cmpb $16,%cl jl 2008f movq (%rsi),%rdx movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi leaq -16(%rdi),%rdi 2008: cmpb $8,%cl jl 2004f movq (%rsi),%rdx movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi leaq -8(%rdi),%rdi 2004: cmpb $4,%cl jl 2002f movl 4(%rsi),%edx movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi leaq -4(%rdi),%rdi 2002: cmpb $2,%cl jl 2001f movw 6(%rsi),%dx movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi leaq -2(%rdi),%rdi 2001: cmpb $1,%cl jl 2000f movb 7(%rsi),%dl movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: std .if \erms == 1 leaq -1(%rdi,%rcx),%rdi leaq -1(%rsi,%rcx),%rsi rep movsb cld .else leaq -8(%rdi,%rcx),%rdi leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx rep movsq cld movq %rdx,%rcx andb $7,%cl jne 2004b .endif \end ret .endif .endm .macro MEMMOVE_BEGIN PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx .endm .macro MEMMOVE_END POP_FRAME_POINTER .endm ENTRY(memmove_std) MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END END(memmove_std) ENTRY(memmove_erms) MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END END(memmove_erms) /* * memcpy(dst, src, len) * rdi, rsi, rdx * * Note: memcpy does not support overlapping copies */ ENTRY(memcpy_std) MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END END(memcpy_std) ENTRY(memcpy_erms) MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END END(memcpy_erms) /* * memset(dst, c, len) * rdi, rsi, rdx */ .macro MEMSET erms PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 movabs $0x0101010101010101,%r10 imulq %r8,%r10 cmpq $32,%rcx jbe 101632f cmpq $256,%rcx ja 1256f 103200: movq %r10,(%rdi) movq %r10,8(%rdi) movq %r10,16(%rdi) movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx ja 103200b cmpb $16,%cl ja 201632f movq %r10,-16(%rdi,%rcx) movq %r10,-8(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT 101632: cmpb $16,%cl jl 100816f 201632: movq %r10,(%rdi) movq %r10,8(%rdi) movq %r10,-16(%rdi,%rcx) movq %r10,-8(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT 100816: cmpb $8,%cl jl 100408f movq %r10,(%rdi) movq %r10,-8(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT 100408: cmpb $4,%cl jl 100204f movl %r10d,(%rdi) movl %r10d,-4(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT 100204: cmpb $2,%cl jl 100001f movw %r10w,(%rdi) movw %r10w,-2(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT 100001: cmpb $0,%cl je 100000f movb %r10b,(%rdi) 100000: POP_FRAME_POINTER ret ALIGN_TEXT 1256: movq %rdi,%r9 movq %r10,%rax testl $15,%edi jnz 3f 1: .if \erms == 1 rep stosb movq %r9,%rax .else movq %rcx,%rdx shrq $3,%rcx rep stosq movq %r9,%rax andl $7,%edx jnz 2f POP_FRAME_POINTER ret 2: movq %r10,-8(%rdi,%rdx) .endif POP_FRAME_POINTER ret ALIGN_TEXT 3: movq %r10,(%rdi) movq %r10,8(%rdi) movq %rdi,%r8 andq $15,%r8 leaq -16(%rcx,%r8),%rcx neg %r8 leaq 16(%rdi,%r8),%rdi jmp 1b .endm ENTRY(memset_std) MEMSET erms=0 END(memset_std) ENTRY(memset_erms) MEMSET erms=1 END(memset_erms) /* fillw(pat, base, cnt) */ /* %rdi,%rsi, %rdx */ ENTRY(fillw) PUSH_FRAME_POINTER movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx rep stosw POP_FRAME_POINTER ret END(fillw) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines should be * the only places that do this. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ .macro SMAP_DISABLE smap .if \smap stac .endif .endm .macro SMAP_ENABLE smap .if \smap clac .endif .endm .macro COPYINOUT_BEGIN .endm .macro COPYINOUT_END movq %rax,PCB_ONFAULT(%r11) POP_FRAME_POINTER .endm .macro COPYINOUT_SMAP_END SMAP_ENABLE smap=1 COPYINOUT_END .endm /* * copyout(from_kernel, to_user, len) * %rdi, %rsi, %rdx */ .macro COPYOUT smap erms PUSH_FRAME_POINTER movq PCPU(CURPCB),%r11 movq $copy_fault,PCB_ONFAULT(%r11) /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movq %rsi,%rax addq %rdx,%rax jc copy_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copy_fault /* * Set return value to zero. Remaining failure mode goes through * copy_fault. */ xorl %eax,%eax /* * Set up arguments for MEMMOVE. */ movq %rdi,%r8 movq %rsi,%rdi movq %r8,%rsi movq %rdx,%rcx SMAP_DISABLE \smap .if \smap == 1 MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END .else MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END .endif /* NOTREACHED */ .endm ENTRY(copyout_nosmap_std) COPYOUT smap=0 erms=0 END(copyout_nosmap_std) ENTRY(copyout_smap_std) COPYOUT smap=1 erms=0 END(copyout_smap_std) ENTRY(copyout_nosmap_erms) COPYOUT smap=0 erms=1 END(copyout_nosmap_erms) ENTRY(copyout_smap_erms) COPYOUT smap=1 erms=1 END(copyout_smap_erms) /* * copyin(from_user, to_kernel, len) * %rdi, %rsi, %rdx */ .macro COPYIN smap erms PUSH_FRAME_POINTER movq PCPU(CURPCB),%r11 movq $copy_fault,PCB_ONFAULT(%r11) /* * make sure address is valid */ movq %rdi,%rax addq %rdx,%rax jc copy_fault movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copy_fault xorl %eax,%eax movq %rdi,%r8 movq %rsi,%rdi movq %r8,%rsi movq %rdx,%rcx SMAP_DISABLE \smap .if \smap == 1 MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END .else MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END .endif /* NOTREACHED */ .endm ENTRY(copyin_nosmap_std) COPYIN smap=0 erms=0 END(copyin_nosmap_std) ENTRY(copyin_smap_std) COPYIN smap=1 erms=0 END(copyin_smap_std) ENTRY(copyin_nosmap_erms) COPYIN smap=0 erms=1 END(copyin_nosmap_erms) ENTRY(copyin_smap_erms) COPYIN smap=1 erms=1 END(copyin_smap_erms) ALIGN_TEXT /* Trap entry clears PSL.AC */ copy_fault: movq $0,PCB_ONFAULT(%r11) movl $EFAULT,%eax POP_FRAME_POINTER ret /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ ENTRY(casueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ + setne %cl /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER + movzbl %cl, %eax ret END(casueword32_nosmap) ENTRY(casueword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ stac #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ clac + setne %cl /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER + movzbl %cl, %eax ret END(casueword32_smap) /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ ENTRY(casueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ + setne %cl /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER + movzbl %cl, %eax ret END(casueword_nosmap) ENTRY(casueword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ stac #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ clac + setne %cl /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER + movzbl %cl, %eax ret END(casueword_smap) /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit * byte from user memory. * addr = %rdi, valp = %rsi */ ENTRY(fueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movq (%rdi),%r11 movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword_nosmap) ENTRY(fueword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax stac movq (%rdi),%r11 clac movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword_smap) ENTRY(fueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movl (%rdi),%r11d movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32_nosmap) ENTRY(fueword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax stac movl (%rdi),%r11d clac movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32_smap) ENTRY(fuword16_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault movzwl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16_nosmap) ENTRY(fuword16_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault stac movzwl (%rdi),%eax clac movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16_smap) ENTRY(fubyte_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault movzbl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte_nosmap) ENTRY(fubyte_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault stac movzbl (%rdi),%eax clac movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte_smap) /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to * user memory. * addr = %rdi, value = %rsi */ ENTRY(suword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movq %rsi,(%rdi) xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword_nosmap) ENTRY(suword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault stac movq %rsi,(%rdi) clac xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword_smap) ENTRY(suword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,(%rdi) xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32_nosmap) ENTRY(suword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault stac movl %esi,(%rdi) clac xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32_smap) ENTRY(suword16_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movw %si,(%rdi) xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16_nosmap) ENTRY(suword16_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault stac movw %si,(%rdi) clac xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16_smap) ENTRY(subyte_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax movb %al,(%rdi) xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte_nosmap) ENTRY(subyte_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax stac movb %al,(%rdi) clac xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte_smap) ALIGN_TEXT /* Fault entry clears PSL.AC */ fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) decq %rax POP_FRAME_POINTER ret /* * copyinstr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx * * copy a string from 'from' to 'to', stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ .macro COPYINSTR smap PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq PCPU(CURPCB),%r9 movq $cpystrflt,PCB_ONFAULT(%r9) movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ subq %rdi,%rax jbe cpystrflt SMAP_DISABLE \smap /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpq %rdx,%rax jb 8f 1: incq %rdx 2: decq %rdx .if \smap == 0 jz copyinstr_toolong .else jz copyinstr_toolong_smap .endif movb (%rdi),%al movb %al,(%rsi) incq %rsi incq %rdi testb %al,%al jnz 2b SMAP_ENABLE \smap /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax /* set *lencopied and return %eax */ movq %rax,PCB_ONFAULT(%r9) testq %rcx,%rcx jz 3f subq %rdx,%r8 movq %r8,(%rcx) 3: POP_FRAME_POINTER ret ALIGN_TEXT 8: movq %rax,%rdx movq %rax,%r8 jmp 1b .endm ENTRY(copyinstr_nosmap) COPYINSTR smap=0 END(copyinstr_nosmap) ENTRY(copyinstr_smap) COPYINSTR smap=1 END(copyinstr_smap) cpystrflt: /* Fault entry clears PSL.AC */ movl $EFAULT,%eax cpystrflt_x: /* set *lencopied and return %eax */ movq $0,PCB_ONFAULT(%r9) testq %rcx,%rcx jz 1f subq %rdx,%r8 movq %r8,(%rcx) 1: POP_FRAME_POINTER ret copyinstr_toolong_smap: clac copyinstr_toolong: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax cmpq %rax,%rdi jae cpystrflt movl $ENAMETOOLONG,%eax jmp cpystrflt_x /* * copystr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ incq %rdx 1: decq %rdx jz 4f movb (%rdi),%al movb %al,(%rsi) incq %rsi incq %rdi testb %al,%al jnz 1b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax 2: testq %rcx,%rcx jz 3f /* set *lencopied and return %rax */ subq %rdx,%r8 movq %r8,(%rcx) 3: POP_FRAME_POINTER ret 4: /* rdx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 2b END(copystr) /* * Handling of special amd64 registers and descriptor tables etc */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%fs /* Beware, use wrmsr to set 64 bit base */ movl %eax,%gs movl %eax,%ss /* reload code selector by turning return into intersegmental return */ popq %rax pushq $KCSEL pushq %rax MEXITCOUNT lretq END(lgdt) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movq %rbx,0(%rdi) /* save rbx */ movq %rsp,8(%rdi) /* save rsp */ movq %rbp,16(%rdi) /* save rbp */ movq %r12,24(%rdi) /* save r12 */ movq %r13,32(%rdi) /* save r13 */ movq %r14,40(%rdi) /* save r14 */ movq %r15,48(%rdi) /* save r15 */ movq 0(%rsp),%rdx /* get rta */ movq %rdx,56(%rdi) /* save rip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movq 0(%rdi),%rbx /* restore rbx */ movq 8(%rdi),%rsp /* restore rsp */ movq 16(%rdi),%rbp /* restore rbp */ movq 24(%rdi),%r12 /* restore r12 */ movq 32(%rdi),%r13 /* restore r13 */ movq 40(%rdi),%r14 /* restore r14 */ movq 48(%rdi),%r15 /* restore r15 */ movq 56(%rdi),%rdx /* get rta */ movq %rdx,0(%rsp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. (Instead of panic on #gp, * return an error.) */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx rdmsr /* Read MSR pointed by %ecx. Returns hi byte in edx, lo in %eax */ salq $32,%rdx /* sign-shift %rdx left */ movl %eax,%eax /* zero-extend %eax -> %rax */ orq %rdx,%rax movq %rax,(%rsi) xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * Support for writing MSRs in the safe manner. (Instead of panic on #gp, * return an error.) */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx movl %esi,%eax sarq $32,%rsi movl %esi,%edx wrmsr /* Write MSR pointed by %ecx. Accepts hi byte in edx, lo in %eax. */ xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movq $0,PCB_ONFAULT(%r8) movl $EFAULT,%eax POP_FRAME_POINTER ret /* * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3); * Invalidates address space addressed by ucr3, then returns to kcr3. * Done in assembler to ensure no other memory accesses happen while * on ucr3. */ ALIGN_TEXT ENTRY(pmap_pti_pcid_invalidate) pushfq cli movq %rdi,%cr3 /* to user page table */ movq %rsi,%cr3 /* back to kernel */ popfq retq /* * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va); * Invalidates virtual address va in address space ucr3, then returns to kcr3. */ ALIGN_TEXT ENTRY(pmap_pti_pcid_invlpg) pushfq cli movq %rdi,%cr3 /* to user page table */ invlpg (%rdx) movq %rsi,%cr3 /* back to kernel */ popfq retq /* * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva, * vm_offset_t eva); * Invalidates virtual addresses between sva and eva in address space ucr3, * then returns to kcr3. */ ALIGN_TEXT ENTRY(pmap_pti_pcid_invlrng) pushfq cli movq %rdi,%cr3 /* to user page table */ 1: invlpg (%rdx) addq $PAGE_SIZE,%rdx cmpq %rdx,%rcx ja 1b movq %rsi,%cr3 /* back to kernel */ popfq retq .altmacro .macro ibrs_seq_label l handle_ibrs_\l: .endm .macro ibrs_call_label l call handle_ibrs_\l .endm .macro ibrs_seq count ll=1 .rept \count ibrs_call_label %(ll) nop ibrs_seq_label %(ll) addq $8,%rsp ll=ll+1 .endr .endm /* all callers already saved %rax, %rdx, and %rcx */ ENTRY(handle_ibrs_entry) cmpb $0,hw_ibrs_active(%rip) je 1f movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx wrmsr movb $1,PCPU(IBPB_SET) testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip) jne 1f ibrs_seq 32 1: ret END(handle_ibrs_entry) ENTRY(handle_ibrs_exit) cmpb $0,PCPU(IBPB_SET) je 1f movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx wrmsr movb $0,PCPU(IBPB_SET) 1: ret END(handle_ibrs_exit) /* registers-neutral version, but needs stack */ ENTRY(handle_ibrs_exit_rs) cmpb $0,PCPU(IBPB_SET) je 1f pushq %rax pushq %rdx pushq %rcx movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx wrmsr popq %rcx popq %rdx popq %rax movb $0,PCPU(IBPB_SET) 1: ret END(handle_ibrs_exit_rs) .noaltmacro /* * Flush L1D cache. Load enough of the data from the kernel text * to flush existing L1D content. * * N.B. The function does not follow ABI calling conventions, it corrupts %rbx. * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags * registers are clobbered. The NMI handler caller only needs %r13 preserved. */ ENTRY(flush_l1d_sw) #define L1D_FLUSH_SIZE (64 * 1024) movq $KERNBASE, %r9 movq $-L1D_FLUSH_SIZE, %rcx /* * pass 1: Preload TLB. * Kernel text is mapped using superpages. TLB preload is * done for the benefit of older CPUs which split 2M page * into 4k TLB entries. */ 1: movb L1D_FLUSH_SIZE(%r9, %rcx), %al addq $PAGE_SIZE, %rcx jne 1b xorl %eax, %eax cpuid movq $-L1D_FLUSH_SIZE, %rcx /* pass 2: Read each cache line. */ 2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al addq $64, %rcx jne 2b lfence ret #undef L1D_FLUSH_SIZE END(flush_l1d_sw) ENTRY(flush_l1d_sw_abi) pushq %rbx call flush_l1d_sw popq %rbx ret END(flush_l1d_sw_abi) ENTRY(mds_handler_void) retq END(mds_handler_void) ENTRY(mds_handler_verw) subq $8, %rsp movw %ds, (%rsp) verw (%rsp) addq $8, %rsp retq END(mds_handler_verw) ENTRY(mds_handler_ivb) pushq %rax pushq %rdx pushq %rcx movq %cr0, %rax testb $CR0_TS, %al je 1f clts 1: movq PCPU(MDS_BUF), %rdx movdqa %xmm0, PCPU(MDS_TMP) pxor %xmm0, %xmm0 lfence orpd (%rdx), %xmm0 orpd (%rdx), %xmm0 mfence movl $40, %ecx addq $16, %rdx 2: movntdq %xmm0, (%rdx) addq $16, %rdx decl %ecx jnz 2b mfence movdqa PCPU(MDS_TMP),%xmm0 testb $CR0_TS, %al je 3f movq %rax, %cr0 3: popq %rcx popq %rdx popq %rax retq END(mds_handler_ivb) ENTRY(mds_handler_bdw) pushq %rax pushq %rbx pushq %rcx pushq %rdi pushq %rsi movq %cr0, %rax testb $CR0_TS, %al je 1f clts 1: movq PCPU(MDS_BUF), %rbx movdqa %xmm0, PCPU(MDS_TMP) pxor %xmm0, %xmm0 movq %rbx, %rdi movq %rbx, %rsi movl $40, %ecx 2: movntdq %xmm0, (%rbx) addq $16, %rbx decl %ecx jnz 2b mfence movl $1536, %ecx rep; movsb lfence movdqa PCPU(MDS_TMP),%xmm0 testb $CR0_TS, %al je 3f movq %rax, %cr0 3: popq %rsi popq %rdi popq %rcx popq %rbx popq %rax retq END(mds_handler_bdw) ENTRY(mds_handler_skl_sse) pushq %rax pushq %rdx pushq %rcx pushq %rdi movq %cr0, %rax testb $CR0_TS, %al je 1f clts 1: movq PCPU(MDS_BUF), %rdi movq PCPU(MDS_BUF64), %rdx movdqa %xmm0, PCPU(MDS_TMP) pxor %xmm0, %xmm0 lfence orpd (%rdx), %xmm0 orpd (%rdx), %xmm0 xorl %eax, %eax 2: clflushopt 5376(%rdi, %rax, 8) addl $8, %eax cmpl $8 * 12, %eax jb 2b sfence movl $6144, %ecx xorl %eax, %eax rep; stosb mfence movdqa PCPU(MDS_TMP), %xmm0 testb $CR0_TS, %al je 3f movq %rax, %cr0 3: popq %rdi popq %rcx popq %rdx popq %rax retq END(mds_handler_skl_sse) ENTRY(mds_handler_skl_avx) pushq %rax pushq %rdx pushq %rcx pushq %rdi movq %cr0, %rax testb $CR0_TS, %al je 1f clts 1: movq PCPU(MDS_BUF), %rdi movq PCPU(MDS_BUF64), %rdx vmovdqa %ymm0, PCPU(MDS_TMP) vpxor %ymm0, %ymm0, %ymm0 lfence vorpd (%rdx), %ymm0, %ymm0 vorpd (%rdx), %ymm0, %ymm0 xorl %eax, %eax 2: clflushopt 5376(%rdi, %rax, 8) addl $8, %eax cmpl $8 * 12, %eax jb 2b sfence movl $6144, %ecx xorl %eax, %eax rep; stosb mfence vmovdqa PCPU(MDS_TMP), %ymm0 testb $CR0_TS, %al je 3f movq %rax, %cr0 3: popq %rdi popq %rcx popq %rdx popq %rax retq END(mds_handler_skl_avx) ENTRY(mds_handler_skl_avx512) pushq %rax pushq %rdx pushq %rcx pushq %rdi movq %cr0, %rax testb $CR0_TS, %al je 1f clts 1: movq PCPU(MDS_BUF), %rdi movq PCPU(MDS_BUF64), %rdx vmovdqa64 %zmm0, PCPU(MDS_TMP) vpxord %zmm0, %zmm0, %zmm0 lfence vorpd (%rdx), %zmm0, %zmm0 vorpd (%rdx), %zmm0, %zmm0 xorl %eax, %eax 2: clflushopt 5376(%rdi, %rax, 8) addl $8, %eax cmpl $8 * 12, %eax jb 2b sfence movl $6144, %ecx xorl %eax, %eax rep; stosb mfence vmovdqa64 PCPU(MDS_TMP), %zmm0 testb $CR0_TS, %al je 3f movq %rax, %cr0 3: popq %rdi popq %rcx popq %rdx popq %rax retq END(mds_handler_skl_avx512) ENTRY(mds_handler_silvermont) pushq %rax pushq %rdx pushq %rcx movq %cr0, %rax testb $CR0_TS, %al je 1f clts 1: movq PCPU(MDS_BUF), %rdx movdqa %xmm0, PCPU(MDS_TMP) pxor %xmm0, %xmm0 movl $16, %ecx 2: movntdq %xmm0, (%rdx) addq $16, %rdx decl %ecx jnz 2b mfence movdqa PCPU(MDS_TMP),%xmm0 testb $CR0_TS, %al je 3f movq %rax, %cr0 3: popq %rcx popq %rdx popq %rax retq END(mds_handler_silvermont) diff --git a/sys/arm/arm/fusu.S b/sys/arm/arm/fusu.S index 590440100a76..81690b9ba2ec 100644 --- a/sys/arm/arm/fusu.S +++ b/sys/arm/arm/fusu.S @@ -1,327 +1,328 @@ /* $NetBSD: fusu.S,v 1.10 2003/12/01 13:34:44 rearnsha Exp $ */ /*- * Copyright (c) 1996-1998 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include "assym.inc" __FBSDID("$FreeBSD$"); .syntax unified #if __ARM_ARCH >= 6 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ add tmp, tmp, #(TD_PCB) #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB #define GET_PCB(tmp) \ ldr tmp, .Lcurpcb #endif /* * casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, * uint32_t newval); */ ENTRY(casueword) EENTRY_NP(casueword32) stmfd sp!, {r4, r5, r6} ldr r4, =(VM_MAXUSER_ADDRESS-3) cmp r0, r4 mvncs r0, #0 - bcs 2f + bcs 1f GET_PCB(r6) ldr r6, [r6] #ifdef DIAGNOSTIC teq r6, #0x00000000 ldmfdeq sp!, {r4, r5, r6} beq .Lfusupcbfault #endif adr r4, .Lcasuwordfault str r4, [r6, #PCB_ONFAULT] #if __ARM_ARCH >= 6 -1: + mov r5, #1 ldrex r4, [r0] cmp r4, r1 strexeq r5, r3, [r0] - cmpeq r5, #1 - beq 1b #else ldrt r4, [r0] cmp r4, r1 strteq r3, [r0] #endif str r4, [r2] mov r0, #0 str r0, [r6, #PCB_ONFAULT] -2: +#if __ARM_ARCH >= 6 + mov r0, r5 +#endif +1: ldmfd sp!, {r4, r5, r6} RET EEND(casueword32) END(casueword) /* * Handle faults from casuword. Clean up and return -1. */ .Lcasuwordfault: mov r0, #0x00000000 str r0, [r6, #PCB_ONFAULT] mvn r0, #0 ldmfd sp!, {r4, r5, r6} RET /* * fueword(caddr_t uaddr, long *val); * Fetch an int from the user's address space. */ ENTRY(fueword) EENTRY_NP(fueword32) ldr r3, =(VM_MAXUSER_ADDRESS-3) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] ldrt r3, [r0] str r3, [r1] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET EEND(fueword32) END(fueword) /* * fusword(caddr_t uaddr); * Fetch a short from the user's address space. */ ENTRY(fusword) ldr r3, =(VM_MAXUSER_ADDRESS-1) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0], #1 ldrbt ip, [r0] #ifdef __ARMEB__ orr r0, ip, r3, asl #8 #else orr r0, r3, ip, asl #8 #endif mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] RET END(fusword) /* * fubyte(caddr_t uaddr); * Fetch a byte from the user's address space. */ ENTRY(fubyte) ldr r3, =VM_MAXUSER_ADDRESS cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0] mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] mov r0, r3 RET END(fubyte) /* * Handle faults from [fs]u*(). Clean up and return -1. */ .Lfusufault: mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] mvn r0, #0x00000000 RET #ifdef DIAGNOSTIC /* * Handle earlier faults from [fs]u*(), due to no pcb */ .Lfusupcbfault: mov r1, r0 adr r0, fusupcbfaulttext b _C_LABEL(panic) fusupcbfaulttext: .asciz "Yikes - no valid PCB during fusuxxx() addr=%08x\n" .align 2 #endif /* * suword(caddr_t uaddr, int x); * Store an int in the user's address space. */ ENTRY(suword) EENTRY_NP(suword32) ldr r3, =(VM_MAXUSER_ADDRESS-3) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] strt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET EEND(suword32) END(suword) /* * susword(caddr_t uaddr, short x); * Store a short in the user's address space. */ ENTRY(susword) ldr r3, =(VM_MAXUSER_ADDRESS-1) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] #ifdef __ARMEB__ mov ip, r1, lsr #8 strbt ip, [r0], #1 #else strbt r1, [r0], #1 mov r1, r1, lsr #8 #endif strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(susword) /* * subyte(caddr_t uaddr, char x); * Store a byte in the user's address space. */ ENTRY(subyte) ldr r3, =VM_MAXUSER_ADDRESS cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(subyte) diff --git a/sys/arm64/arm64/support.S b/sys/arm64/arm64/support.S index f936cab4e2cd..c5aba58c95f1 100644 --- a/sys/arm64/arm64/support.S +++ b/sys/arm64/arm64/support.S @@ -1,290 +1,290 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2014-2015 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Andrew Turner * under sponsorship from the FreeBSD Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "assym.inc" /* * One of the fu* or su* functions failed, return -1. */ ENTRY(fsu_fault) SET_FAULT_HANDLER(xzr, x1) /* Reset the handler function */ EXIT_USER_ACCESS_CHECK(w0, x1) fsu_fault_nopcb: mov x0, #-1 ret END(fsu_fault) /* * int casueword32(volatile uint32_t *, uint32_t, uint32_t *, uint32_t) */ ENTRY(casueword32) ldr x4, =(VM_MAXUSER_ADDRESS-3) cmp x0, x4 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ + mov w5, #1 SET_FAULT_HANDLER(x6, x4) /* And set it */ ENTER_USER_ACCESS(w6, x4) 1: ldxr w4, [x0] /* Load-exclusive the data */ cmp w4, w1 /* Compare */ b.ne 2f /* Not equal, exit */ stxr w5, w3, [x0] /* Store the new data */ - cbnz w5, 1b /* Retry on failure */ 2: EXIT_USER_ACCESS(w6) - SET_FAULT_HANDLER(xzr, x5) /* Reset the fault handler */ + SET_FAULT_HANDLER(xzr, x6) /* Reset the fault handler */ str w4, [x2] /* Store the read data */ - mov x0, #0 /* Success */ + mov w0, w5 /* Result same as store status */ ret /* Return */ END(casueword32) /* * int casueword(volatile u_long *, u_long, u_long *, u_long) */ ENTRY(casueword) ldr x4, =(VM_MAXUSER_ADDRESS-7) cmp x0, x4 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ + mov w5, #1 SET_FAULT_HANDLER(x6, x4) /* And set it */ ENTER_USER_ACCESS(w6, x4) 1: ldxr x4, [x0] /* Load-exclusive the data */ cmp x4, x1 /* Compare */ b.ne 2f /* Not equal, exit */ stxr w5, x3, [x0] /* Store the new data */ - cbnz w5, 1b /* Retry on failure */ 2: EXIT_USER_ACCESS(w6) - SET_FAULT_HANDLER(xzr, x5) /* Reset the fault handler */ + SET_FAULT_HANDLER(xzr, x6) /* Reset the fault handler */ str x4, [x2] /* Store the read data */ - mov x0, #0 /* Success */ + mov w0, w5 /* Result same as store status */ ret /* Return */ END(casueword) /* * int fubyte(volatile const void *) */ ENTRY(fubyte) ldr x1, =VM_MAXUSER_ADDRESS cmp x0, x1 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x1) /* And set it */ ldtrb w0, [x0] /* Try loading the data */ SET_FAULT_HANDLER(xzr, x1) /* Reset the fault handler */ ret /* Return */ END(fubyte) /* * int fuword(volatile const void *) */ ENTRY(fuword16) ldr x1, =(VM_MAXUSER_ADDRESS-1) cmp x0, x1 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x1) /* And set it */ ldtrh w0, [x0] /* Try loading the data */ SET_FAULT_HANDLER(xzr, x1) /* Reset the fault handler */ ret /* Return */ END(fuword16) /* * int32_t fueword32(volatile const void *, int32_t *) */ ENTRY(fueword32) ldr x2, =(VM_MAXUSER_ADDRESS-3) cmp x0, x2 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x2) /* And set it */ ldtr w0, [x0] /* Try loading the data */ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ str w0, [x1] /* Save the data in kernel space */ mov w0, #0 /* Success */ ret /* Return */ END(fueword32) /* * long fueword(volatile const void *, int64_t *) * int64_t fueword64(volatile const void *, int64_t *) */ ENTRY(fueword) EENTRY(fueword64) ldr x2, =(VM_MAXUSER_ADDRESS-7) cmp x0, x2 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x2) /* And set it */ ldtr x0, [x0] /* Try loading the data */ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ str x0, [x1] /* Save the data in kernel space */ mov x0, #0 /* Success */ ret /* Return */ EEND(fueword64) END(fueword) /* * int subyte(volatile void *, int) */ ENTRY(subyte) ldr x2, =VM_MAXUSER_ADDRESS cmp x0, x2 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x2) /* And set it */ sttrb w1, [x0] /* Try storing the data */ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ mov x0, #0 /* Success */ ret /* Return */ END(subyte) /* * int suword16(volatile void *, int) */ ENTRY(suword16) ldr x2, =(VM_MAXUSER_ADDRESS-1) cmp x0, x2 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x2) /* And set it */ sttrh w1, [x0] /* Try storing the data */ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ mov x0, #0 /* Success */ ret /* Return */ END(suword16) /* * int suword32(volatile void *, int) */ ENTRY(suword32) ldr x2, =(VM_MAXUSER_ADDRESS-3) cmp x0, x2 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x2) /* And set it */ sttr w1, [x0] /* Try storing the data */ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ mov x0, #0 /* Success */ ret /* Return */ END(suword32) /* * int suword(volatile void *, long) */ ENTRY(suword) EENTRY(suword64) ldr x2, =(VM_MAXUSER_ADDRESS-7) cmp x0, x2 b.cs fsu_fault_nopcb adr x6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(x6, x2) /* And set it */ sttr x1, [x0] /* Try storing the data */ SET_FAULT_HANDLER(xzr, x2) /* Reset the fault handler */ mov x0, #0 /* Success */ ret /* Return */ EEND(suword64) END(suword) ENTRY(setjmp) /* Store the stack pointer */ mov x8, sp str x8, [x0], #8 /* Store the general purpose registers and lr */ stp x19, x20, [x0], #16 stp x21, x22, [x0], #16 stp x23, x24, [x0], #16 stp x25, x26, [x0], #16 stp x27, x28, [x0], #16 stp x29, lr, [x0], #16 /* Return value */ mov x0, #0 ret END(setjmp) ENTRY(longjmp) /* Restore the stack pointer */ ldr x8, [x0], #8 mov sp, x8 /* Restore the general purpose registers and lr */ ldp x19, x20, [x0], #16 ldp x21, x22, [x0], #16 ldp x23, x24, [x0], #16 ldp x25, x26, [x0], #16 ldp x27, x28, [x0], #16 ldp x29, lr, [x0], #16 /* Load the return value */ mov x0, x1 ret END(longjmp) /* * pagezero, simple implementation */ ENTRY(pagezero_simple) add x1, x0, #PAGE_SIZE 1: stp xzr, xzr, [x0], #0x10 stp xzr, xzr, [x0], #0x10 stp xzr, xzr, [x0], #0x10 stp xzr, xzr, [x0], #0x10 cmp x0, x1 b.ne 1b ret END(pagezero_simple) /* * pagezero, cache assisted */ ENTRY(pagezero_cache) add x1, x0, #PAGE_SIZE ldr x2, =dczva_line_size ldr x2, [x2] 1: dc zva, x0 add x0, x0, x2 cmp x0, x1 b.ne 1b ret END(pagezero_cache) diff --git a/sys/i386/i386/copyout.c b/sys/i386/i386/copyout.c index 84615cd8670b..428acd616cde 100644 --- a/sys/i386/i386/copyout.c +++ b/sys/i386/i386/copyout.c @@ -1,475 +1,477 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int); static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int); int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int); static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int); int fubyte_fast(volatile const void *base, u_int kcr3); static int (*fubyte_fast_tramp)(volatile const void *, u_int); int fuword16_fast(volatile const void *base, u_int kcr3); static int (*fuword16_fast_tramp)(volatile const void *, u_int); int fueword_fast(volatile const void *base, long *val, u_int kcr3); static int (*fueword_fast_tramp)(volatile const void *, long *, u_int); int subyte_fast(volatile void *base, int val, u_int kcr3); static int (*subyte_fast_tramp)(volatile void *, int, u_int); int suword16_fast(volatile void *base, int val, u_int kcr3); static int (*suword16_fast_tramp)(volatile void *, int, u_int); int suword_fast(volatile void *base, long val, u_int kcr3); static int (*suword_fast_tramp)(volatile void *, long, u_int); static int fast_copyout = 1; SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN, &fast_copyout, 0, ""); void copyout_init_tramp(void) { copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))( (uintptr_t)copyin_fast + setidt_disp); copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))( (uintptr_t)copyout_fast + setidt_disp); fubyte_fast_tramp = (int (*)(volatile const void *, u_int))( (uintptr_t)fubyte_fast + setidt_disp); fuword16_fast_tramp = (int (*)(volatile const void *, u_int))( (uintptr_t)fuword16_fast + setidt_disp); fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))( (uintptr_t)fueword_fast + setidt_disp); subyte_fast_tramp = (int (*)(volatile void *, int, u_int))( (uintptr_t)subyte_fast + setidt_disp); suword16_fast_tramp = (int (*)(volatile void *, int, u_int))( (uintptr_t)suword16_fast + setidt_disp); suword_fast_tramp = (int (*)(volatile void *, long, u_int))( (uintptr_t)suword_fast + setidt_disp); } int cp_slow0(vm_offset_t uva, size_t len, bool write, void (*f)(vm_offset_t, void *), void *arg) { struct pcpu *pc; vm_page_t m[2]; vm_offset_t kaddr; int error, i, plen; bool sleepable; plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE); MPASS(plen <= nitems(m)); error = 0; i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len, (write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT, m, nitems(m)); if (i != plen) return (EFAULT); sched_pin(); pc = get_pcpu(); if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 || (curthread->td_pflags & TDP_NOFAULTING) != 0) { sleepable = false; mtx_lock(&pc->pc_copyout_mlock); kaddr = pc->pc_copyout_maddr; } else { sleepable = true; sx_xlock(&pc->pc_copyout_slock); kaddr = pc->pc_copyout_saddr; } pmap_cp_slow0_map(kaddr, plen, m); kaddr += uva - trunc_page(uva); f(kaddr, arg); sched_unpin(); if (sleepable) sx_xunlock(&pc->pc_copyout_slock); else mtx_unlock(&pc->pc_copyout_mlock); vm_page_unhold_pages(m, plen); return (error); } struct copyinstr_arg0 { vm_offset_t kc; size_t len; size_t alen; bool end; }; static void copyinstr_slow0(vm_offset_t kva, void *arg) { struct copyinstr_arg0 *ca; char c; ca = arg; MPASS(ca->alen == 0 && ca->len > 0 && !ca->end); while (ca->alen < ca->len && !ca->end) { c = *(char *)(kva + ca->alen); *(char *)ca->kc = c; ca->alen++; ca->kc++; if (c == '\0') ca->end = true; } } int copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied) { struct copyinstr_arg0 ca; vm_offset_t uc; size_t plen; int error; error = 0; ca.end = false; for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) { ca.len = round_page(uc) - uc; if (ca.len == 0) ca.len = PAGE_SIZE; if (plen + ca.len > maxlen) ca.len = maxlen - plen; ca.alen = 0; if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) { error = EFAULT; break; } } if (!ca.end && plen == maxlen && error == 0) error = ENAMETOOLONG; if (lencopied != NULL) *lencopied = plen; return (error); } struct copyin_arg0 { vm_offset_t kc; size_t len; }; static void copyin_slow0(vm_offset_t kva, void *arg) { struct copyin_arg0 *ca; ca = arg; bcopy((void *)kva, (void *)ca->kc, ca->len); } int copyin(const void *udaddr, void *kaddr, size_t len) { struct copyin_arg0 ca; vm_offset_t uc; size_t plen; if ((uintptr_t)udaddr + len < (uintptr_t)udaddr || (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) return (EFAULT); if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && copyin_fast_tramp(udaddr, kaddr, len, pmap_get_kcr3()) == 0)) return (0); for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { ca.len = round_page(uc) - uc; if (ca.len == 0) ca.len = PAGE_SIZE; if (plen + ca.len > len) ca.len = len - plen; if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0) return (EFAULT); } return (0); } static void copyout_slow0(vm_offset_t kva, void *arg) { struct copyin_arg0 *ca; ca = arg; bcopy((void *)ca->kc, (void *)kva, ca->len); } int copyout(const void *kaddr, void *udaddr, size_t len) { struct copyin_arg0 ca; vm_offset_t uc; size_t plen; if ((uintptr_t)udaddr + len < (uintptr_t)udaddr || (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) return (EFAULT); if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && copyout_fast_tramp(kaddr, udaddr, len, pmap_get_kcr3()) == 0)) return (0); for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { ca.len = round_page(uc) - uc; if (ca.len == 0) ca.len = PAGE_SIZE; if (plen + ca.len > len) ca.len = len - plen; if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0) return (EFAULT); } return (0); } /* * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user * memory. */ static void fubyte_slow0(vm_offset_t kva, void *arg) { *(int *)arg = *(u_char *)kva; } int fubyte(volatile const void *base) { int res; if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { res = fubyte_fast_tramp(base, pmap_get_kcr3()); if (res != -1) return (res); } if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0, &res) != 0) return (-1); return (res); } static void fuword16_slow0(vm_offset_t kva, void *arg) { *(int *)arg = *(uint16_t *)kva; } int fuword16(volatile const void *base) { int res; if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { res = fuword16_fast_tramp(base, pmap_get_kcr3()); if (res != -1) return (res); } if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false, fuword16_slow0, &res) != 0) return (-1); return (res); } static void fueword_slow0(vm_offset_t kva, void *arg) { *(uint32_t *)arg = *(uint32_t *)kva; } int fueword(volatile const void *base, long *val) { uint32_t res; if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base || (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { if (fueword_fast_tramp(base, val, pmap_get_kcr3()) == 0) return (0); } if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0, &res) != 0) return (-1); *val = res; return (0); } int fueword32(volatile const void *base, int32_t *val) { return (fueword(base, (long *)val)); } /* * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory. */ static void subyte_slow0(vm_offset_t kva, void *arg) { *(u_char *)kva = *(int *)arg; } int subyte(volatile void *base, int byte) { if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout && subyte_fast_tramp(base, byte, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(u_char), true, subyte_slow0, &byte) != 0 ? -1 : 0); } static void suword16_slow0(vm_offset_t kva, void *arg) { *(int *)kva = *(uint16_t *)arg; } int suword16(volatile void *base, int word) { if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout && suword16_fast_tramp(base, word, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(int16_t), true, suword16_slow0, &word) != 0 ? -1 : 0); } static void suword_slow0(vm_offset_t kva, void *arg) { *(int *)kva = *(uint32_t *)arg; } int suword(volatile void *base, long word) { if ((uintptr_t)base + sizeof(word) < (uintptr_t)base || (uintptr_t)base + sizeof(word) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout && suword_fast_tramp(base, word, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(long), true, suword_slow0, &word) != 0 ? -1 : 0); } int suword32(volatile void *base, int32_t word) { return (suword(base, word)); } struct casueword_arg0 { uint32_t oldval; uint32_t newval; + int res; }; static void casueword_slow0(vm_offset_t kva, void *arg) { struct casueword_arg0 *ca; ca = arg; - atomic_fcmpset_int((u_int *)kva, &ca->oldval, ca->newval); + ca->res = 1 - atomic_fcmpset_int((u_int *)kva, &ca->oldval, + ca->newval); } int casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, uint32_t newval) { struct casueword_arg0 ca; int res; ca.oldval = oldval; ca.newval = newval; res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true, casueword_slow0, &ca); if (res == 0) { *oldvalp = ca.oldval; - return (0); + return (ca.res); } return (-1); } int casueword(volatile u_long *base, u_long oldval, u_long *oldvalp, u_long newval) { struct casueword_arg0 ca; int res; ca.oldval = oldval; ca.newval = newval; res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true, casueword_slow0, &ca); if (res == 0) { *oldvalp = ca.oldval; - return (0); + return (ca.res); } return (-1); } diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index f09369b35fdf..7f3f0f0e0426 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -1,4579 +1,4713 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015, 2016 The FreeBSD Foundation * Copyright (c) 2004, David Xu * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_umtx_profiling.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #endif #define _UMUTEX_TRY 1 #define _UMUTEX_WAIT 2 #ifdef UMTX_PROFILING #define UPROF_PERC_BIGGER(w, f, sw, sf) \ (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) #endif /* Priority inheritance mutex info. */ struct umtx_pi { /* Owner thread */ struct thread *pi_owner; /* Reference count */ int pi_refcount; /* List entry to link umtx holding by thread */ TAILQ_ENTRY(umtx_pi) pi_link; /* List entry in hash */ TAILQ_ENTRY(umtx_pi) pi_hashlink; /* List for waiters */ TAILQ_HEAD(,umtx_q) pi_blocked; /* Identify a userland lock object */ struct umtx_key pi_key; }; /* A userland synchronous object user. */ struct umtx_q { /* Linked list for the hash. */ TAILQ_ENTRY(umtx_q) uq_link; /* Umtx key. */ struct umtx_key uq_key; /* Umtx flags. */ int uq_flags; #define UQF_UMTXQ 0x0001 /* The thread waits on. */ struct thread *uq_thread; /* * Blocked on PI mutex. read can use chain lock * or umtx_lock, write must have both chain lock and * umtx_lock being hold. */ struct umtx_pi *uq_pi_blocked; /* On blocked list */ TAILQ_ENTRY(umtx_q) uq_lockq; /* Thread contending with us */ TAILQ_HEAD(,umtx_pi) uq_pi_contested; /* Inherited priority from PP mutex */ u_char uq_inherited_pri; /* Spare queue ready to be reused */ struct umtxq_queue *uq_spare_queue; /* The queue we on */ struct umtxq_queue *uq_cur_queue; }; TAILQ_HEAD(umtxq_head, umtx_q); /* Per-key wait-queue */ struct umtxq_queue { struct umtxq_head head; struct umtx_key key; LIST_ENTRY(umtxq_queue) link; int length; }; LIST_HEAD(umtxq_list, umtxq_queue); /* Userland lock object's wait-queue chain */ struct umtxq_chain { /* Lock for this chain. */ struct mtx uc_lock; /* List of sleep queues. */ struct umtxq_list uc_queue[2]; #define UMTX_SHARED_QUEUE 0 #define UMTX_EXCLUSIVE_QUEUE 1 LIST_HEAD(, umtxq_queue) uc_spare_queue; /* Busy flag */ char uc_busy; /* Chain lock waiters */ int uc_waiters; /* All PI in the list */ TAILQ_HEAD(,umtx_pi) uc_pi_list; #ifdef UMTX_PROFILING u_int length; u_int max_length; #endif }; #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) /* * Don't propagate time-sharing priority, there is a security reason, * a user can simply introduce PI-mutex, let thread A lock the mutex, * and let another thread B block on the mutex, because B is * sleeping, its priority will be boosted, this causes A's priority to * be boosted via priority propagating too and will never be lowered even * if it is using 100%CPU, this is unfair to other processes. */ #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ PRI_MAX_TIMESHARE : (td)->td_user_pri) #define GOLDEN_RATIO_PRIME 2654404609U #ifndef UMTX_CHAINS #define UMTX_CHAINS 512 #endif #define UMTX_SHIFTS (__WORD_BIT - 9) #define GET_SHARE(flags) \ (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) #define BUSY_SPINS 200 struct abs_timeout { int clockid; bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ struct timespec cur; struct timespec end; }; #ifdef COMPAT_FREEBSD32 struct umutex32 { volatile __lwpid_t m_owner; /* Owner of the mutex */ __uint32_t m_flags; /* Flags of the mutex */ __uint32_t m_ceilings[2]; /* Priority protect ceiling */ __uint32_t m_rb_lnk; /* Robust linkage */ __uint32_t m_pad; __uint32_t m_spare[2]; }; _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); _Static_assert(__offsetof(struct umutex, m_spare[0]) == __offsetof(struct umutex32, m_spare[0]), "m_spare32"); #endif int umtx_shm_vnobj_persistent = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, &umtx_shm_vnobj_persistent, 0, "False forces destruction of umtx attached to file, on last close"); static int umtx_max_rb = 1000; SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, &umtx_max_rb, 0, ""); static uma_zone_t umtx_pi_zone; static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); static int umtx_pi_allocated; static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, &umtx_pi_allocated, 0, "Allocated umtx_pi"); static int umtx_verbose_rb = 1; SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, &umtx_verbose_rb, 0, ""); #ifdef UMTX_PROFILING static long max_length; SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); #endif static void abs_timeout_update(struct abs_timeout *timo); static void umtx_shm_init(void); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); static void umtxq_lock(struct umtx_key *key); static void umtxq_unlock(struct umtx_key *key); static void umtxq_busy(struct umtx_key *key); static void umtxq_unbusy(struct umtx_key *key); static void umtxq_insert_queue(struct umtx_q *uq, int q); static void umtxq_remove_queue(struct umtx_q *uq, int q); static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); static int umtxq_count(struct umtx_key *key); static struct umtx_pi *umtx_pi_alloc(int); static void umtx_pi_free(struct umtx_pi *pi); static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb); static void umtx_thread_cleanup(struct thread *td); static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused); SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) static struct mtx umtx_lock; #ifdef UMTX_PROFILING static void umtx_init_profiling(void) { struct sysctl_oid *chain_oid; char chain_name[10]; int i; for (i = 0; i < UMTX_CHAINS; ++i) { snprintf(chain_name, sizeof(chain_name), "%d", i); chain_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); } } static int sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) { char buf[512]; struct sbuf sb; struct umtxq_chain *uc; u_int fract, i, j, tot, whole; u_int sf0, sf1, sf2, sf3, sf4; u_int si0, si1, si2, si3, si4; u_int sw0, sw1, sw2, sw3, sw4; sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); for (i = 0; i < 2; i++) { tot = 0; for (j = 0; j < UMTX_CHAINS; ++j) { uc = &umtxq_chains[i][j]; mtx_lock(&uc->uc_lock); tot += uc->max_length; mtx_unlock(&uc->uc_lock); } if (tot == 0) sbuf_printf(&sb, "%u) Empty ", i); else { sf0 = sf1 = sf2 = sf3 = sf4 = 0; si0 = si1 = si2 = si3 = si4 = 0; sw0 = sw1 = sw2 = sw3 = sw4 = 0; for (j = 0; j < UMTX_CHAINS; j++) { uc = &umtxq_chains[i][j]; mtx_lock(&uc->uc_lock); whole = uc->max_length * 100; mtx_unlock(&uc->uc_lock); fract = (whole % tot) * 100; if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { sf0 = fract; si0 = j; sw0 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw1, sf1)) { sf1 = fract; si1 = j; sw1 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw2, sf2)) { sf2 = fract; si2 = j; sw2 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw3, sf3)) { sf3 = fract; si3 = j; sw3 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw4, sf4)) { sf4 = fract; si4 = j; sw4 = whole; } } sbuf_printf(&sb, "queue %u:\n", i); sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, sf0 / tot, si0); sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, sf1 / tot, si1); sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, sf2 / tot, si2); sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, sf3 / tot, si3); sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, sf4 / tot, si4); } } sbuf_trim(&sb); sbuf_finish(&sb); sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (0); } static int sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) { struct umtxq_chain *uc; u_int i, j; int clear, error; clear = 0; error = sysctl_handle_int(oidp, &clear, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (clear != 0) { for (i = 0; i < 2; ++i) { for (j = 0; j < UMTX_CHAINS; ++j) { uc = &umtxq_chains[i][j]; mtx_lock(&uc->uc_lock); uc->length = 0; uc->max_length = 0; mtx_unlock(&uc->uc_lock); } } } return (0); } SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); #endif static void umtxq_sysinit(void *arg __unused) { int i, j; umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (i = 0; i < 2; ++i) { for (j = 0; j < UMTX_CHAINS; ++j) { mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, MTX_DEF | MTX_DUPOK); LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); umtxq_chains[i][j].uc_busy = 0; umtxq_chains[i][j].uc_waiters = 0; #ifdef UMTX_PROFILING umtxq_chains[i][j].length = 0; umtxq_chains[i][j].max_length = 0; #endif } } #ifdef UMTX_PROFILING umtx_init_profiling(); #endif mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, EVENTHANDLER_PRI_ANY); umtx_shm_init(); } struct umtx_q * umtxq_alloc(void) { struct umtx_q *uq; uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); TAILQ_INIT(&uq->uq_spare_queue->head); TAILQ_INIT(&uq->uq_pi_contested); uq->uq_inherited_pri = PRI_MAX; return (uq); } void umtxq_free(struct umtx_q *uq) { MPASS(uq->uq_spare_queue != NULL); free(uq->uq_spare_queue, M_UMTX); free(uq, M_UMTX); } static inline void umtxq_hash(struct umtx_key *key) { unsigned n; n = (uintptr_t)key->info.both.a + key->info.both.b; key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; } static inline struct umtxq_chain * umtxq_getchain(struct umtx_key *key) { if (key->type <= TYPE_SEM) return (&umtxq_chains[1][key->hash]); return (&umtxq_chains[0][key->hash]); } /* * Lock a chain. */ static inline void umtxq_lock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_lock(&uc->uc_lock); } /* * Unlock a chain. */ static inline void umtxq_unlock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_unlock(&uc->uc_lock); } /* * Set chain to busy state when following operation * may be blocked (kernel mutex can not be used). */ static inline void umtxq_busy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); if (uc->uc_busy) { #ifdef SMP if (smp_cpus > 1) { int count = BUSY_SPINS; if (count > 0) { umtxq_unlock(key); while (uc->uc_busy && --count > 0) cpu_spinwait(); umtxq_lock(key); } } #endif while (uc->uc_busy) { uc->uc_waiters++; msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); uc->uc_waiters--; } } uc->uc_busy = 1; } /* * Unbusy a chain. */ static inline void umtxq_unbusy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); KASSERT(uc->uc_busy != 0, ("not busy")); uc->uc_busy = 0; if (uc->uc_waiters) wakeup_one(uc); } static inline void umtxq_unbusy_unlocked(struct umtx_key *key) { umtxq_lock(key); umtxq_unbusy(key); umtxq_unlock(key); } static struct umtxq_queue * umtxq_queue_lookup(struct umtx_key *key, int q) { struct umtxq_queue *uh; struct umtxq_chain *uc; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); LIST_FOREACH(uh, &uc->uc_queue[q], link) { if (umtx_key_match(&uh->key, key)) return (uh); } return (NULL); } static inline void umtxq_insert_queue(struct umtx_q *uq, int q) { struct umtxq_queue *uh; struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); uh = umtxq_queue_lookup(&uq->uq_key, q); if (uh != NULL) { LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); } else { uh = uq->uq_spare_queue; uh->key = uq->uq_key; LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); #ifdef UMTX_PROFILING uc->length++; if (uc->length > uc->max_length) { uc->max_length = uc->length; if (uc->max_length > max_length) max_length = uc->max_length; } #endif } uq->uq_spare_queue = NULL; TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); uh->length++; uq->uq_flags |= UQF_UMTXQ; uq->uq_cur_queue = uh; return; } static inline void umtxq_remove_queue(struct umtx_q *uq, int q) { struct umtxq_chain *uc; struct umtxq_queue *uh; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); if (uq->uq_flags & UQF_UMTXQ) { uh = uq->uq_cur_queue; TAILQ_REMOVE(&uh->head, uq, uq_link); uh->length--; uq->uq_flags &= ~UQF_UMTXQ; if (TAILQ_EMPTY(&uh->head)) { KASSERT(uh->length == 0, ("inconsistent umtxq_queue length")); #ifdef UMTX_PROFILING uc->length--; #endif LIST_REMOVE(uh, link); } else { uh = LIST_FIRST(&uc->uc_spare_queue); KASSERT(uh != NULL, ("uc_spare_queue is empty")); LIST_REMOVE(uh, link); } uq->uq_spare_queue = uh; uq->uq_cur_queue = NULL; } } /* * Check if there are multiple waiters */ static int umtxq_count(struct umtx_key *key) { struct umtxq_queue *uh; UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); if (uh != NULL) return (uh->length); return (0); } /* * Check if there are multiple PI waiters and returns first * waiter. */ static int umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) { struct umtxq_queue *uh; *first = NULL; UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); if (uh != NULL) { *first = TAILQ_FIRST(&uh->head); return (uh->length); } return (0); } +/* + * Check for possible stops and suspensions while executing a umtx + * locking operation. + * + * The sleep argument controls whether the function can handle a stop + * request itself or it should return ERESTART and the request is + * proceed at the kernel/user boundary in ast. + * + * Typically, when retrying due to casueword(9) failure (rv == 1), we + * should handle the stop requests there, with exception of cases when + * the thread busied the umtx key, or when functions return + * immediately if umtxq_check_susp() returned non-zero. On the other + * hand, retrying the whole lock operation, we better not stop there + * but delegate the handling to ast. + * + * If the request is for thread termination P_SINGLE_EXIT, we cannot + * handle it at all, and simply return EINTR. + */ static int -umtxq_check_susp(struct thread *td) +umtxq_check_susp(struct thread *td, bool sleep) { struct proc *p; int error; /* * The check for TDF_NEEDSUSPCHK is racy, but it is enough to * eventually break the lockstep loop. */ if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) return (0); error = 0; p = td->td_proc; PROC_LOCK(p); if (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { if (p->p_flag & P_SINGLE_EXIT) error = EINTR; else - error = ERESTART; + error = sleep ? thread_suspend_check(0) : ERESTART; } PROC_UNLOCK(p); return (error); } /* * Wake up threads waiting on an userland object. */ static int umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) { struct umtxq_queue *uh; struct umtx_q *uq; int ret; ret = 0; UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); uh = umtxq_queue_lookup(key, q); if (uh != NULL) { while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { umtxq_remove_queue(uq, q); wakeup(uq); if (++ret >= n_wake) return (ret); } } return (ret); } /* * Wake up specified thread. */ static inline void umtxq_signal_thread(struct umtx_q *uq) { UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); umtxq_remove(uq); wakeup(uq); } static inline int tstohz(const struct timespec *tsp) { struct timeval tv; TIMESPEC_TO_TIMEVAL(&tv, tsp); return tvtohz(&tv); } static void abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, const struct timespec *timeout) { timo->clockid = clockid; if (!absolute) { timo->is_abs_real = false; abs_timeout_update(timo); timespecadd(&timo->cur, timeout, &timo->end); } else { timo->end = *timeout; timo->is_abs_real = clockid == CLOCK_REALTIME || clockid == CLOCK_REALTIME_FAST || clockid == CLOCK_REALTIME_PRECISE; /* * If is_abs_real, umtxq_sleep will read the clock * after setting td_rtcgen; otherwise, read it here. */ if (!timo->is_abs_real) { abs_timeout_update(timo); } } } static void abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) { abs_timeout_init(timo, umtxtime->_clockid, (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); } static inline void abs_timeout_update(struct abs_timeout *timo) { kern_clock_gettime(curthread, timo->clockid, &timo->cur); } static int abs_timeout_gethz(struct abs_timeout *timo) { struct timespec tts; if (timespeccmp(&timo->end, &timo->cur, <=)) return (-1); timespecsub(&timo->end, &timo->cur, &tts); return (tstohz(&tts)); } static uint32_t umtx_unlock_val(uint32_t flags, bool rb) { if (rb) return (UMUTEX_RB_OWNERDEAD); else if ((flags & UMUTEX_NONCONSISTENT) != 0) return (UMUTEX_RB_NOTRECOV); else return (UMUTEX_UNOWNED); } /* * Put thread into sleep state, before sleeping, check if * thread was removed from umtx queue. */ static inline int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) { struct umtxq_chain *uc; int error, timo; if (abstime != NULL && abstime->is_abs_real) { curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); abs_timeout_update(abstime); } uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); for (;;) { if (!(uq->uq_flags & UQF_UMTXQ)) { error = 0; break; } if (abstime != NULL) { timo = abs_timeout_gethz(abstime); if (timo < 0) { error = ETIMEDOUT; break; } } else timo = 0; error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); if (error == EINTR || error == ERESTART) { umtxq_lock(&uq->uq_key); break; } if (abstime != NULL) { if (abstime->is_abs_real) curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); abs_timeout_update(abstime); } umtxq_lock(&uq->uq_key); } curthread->td_rtcgen = 0; return (error); } /* * Convert userspace address into unique logical address. */ int umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) { struct thread *td = curthread; vm_map_t map; vm_map_entry_t entry; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; key->type = type; if (share == THREAD_SHARE) { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } else { MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); map = &td->td_proc->p_vmspace->vm_map; if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, &entry, &key->info.shared.object, &pindex, &prot, &wired) != KERN_SUCCESS) { return (EFAULT); } if ((share == PROCESS_SHARE) || (share == AUTO_SHARE && VM_INHERIT_SHARE == entry->inheritance)) { key->shared = 1; key->info.shared.offset = (vm_offset_t)addr - entry->start + entry->offset; vm_object_reference(key->info.shared.object); } else { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } vm_map_lookup_done(map, entry); } umtxq_hash(key); return (0); } /* * Release key. */ void umtx_key_release(struct umtx_key *key) { if (key->shared) vm_object_deallocate(key->info.shared.object); } /* * Fetch and compare value, sleep on the address if value is not changed. */ static int do_wait(struct thread *td, void *addr, u_long id, struct _umtx_time *timeout, int compat32, int is_private) { struct abs_timeout timo; struct umtx_q *uq; u_long tmp; uint32_t tmp32; int error = 0; uq = td->td_umtxq; if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); if (compat32 == 0) { error = fueword(addr, &tmp); if (error != 0) error = EFAULT; } else { error = fueword32(addr, &tmp32); if (error == 0) tmp = tmp32; else error = EFAULT; } umtxq_lock(&uq->uq_key); if (error == 0) { if (tmp == id) error = umtxq_sleep(uq, "uwait", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else umtxq_remove(uq); } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { umtxq_remove(uq); } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } /* * Wake up threads sleeping on the specified address. */ int kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) { struct umtx_key key; int ret; if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) return (ret); umtxq_lock(&key); umtxq_signal(&key, n_wake); umtxq_unlock(&key); umtx_key_release(&key); return (0); } /* * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, struct _umtx_time *timeout, int mode) { struct abs_timeout timo; struct umtx_q *uq; uint32_t owner, old, id; int error, rv; id = td->td_tid; uq = td->td_umtxq; error = 0; if (timeout != NULL) abs_timeout_init2(&timo, timeout); /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { rv = fueword32(&m->m_owner, &owner); if (rv == -1) return (EFAULT); if (mode == _UMUTEX_WAIT) { if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV) return (0); } else { /* * Robust mutex terminated. Kernel duty is to * return EOWNERDEAD to the userspace. The * umutex.m_flags UMUTEX_NONCONSISTENT is set * by the common userspace code. */ if (owner == UMUTEX_RB_OWNERDEAD) { rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, &owner, id | UMUTEX_CONTESTED); if (rv == -1) return (EFAULT); - if (owner == UMUTEX_RB_OWNERDEAD) + if (rv == 0) { + MPASS(owner == UMUTEX_RB_OWNERDEAD); return (EOWNERDEAD); /* success */ - rv = umtxq_check_susp(td); + } + MPASS(rv == 1); + rv = umtxq_check_susp(td, false); if (rv != 0) return (rv); continue; } if (owner == UMUTEX_RB_NOTRECOV) return (ENOTRECOVERABLE); /* * Try the uncontested case. This should be * done in userland. */ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); /* The address was invalid. */ if (rv == -1) return (EFAULT); /* The acquire succeeded. */ - if (owner == UMUTEX_UNOWNED) + if (rv == 0) { + MPASS(owner == UMUTEX_UNOWNED); return (0); + } /* * If no one owns it but it is contested try * to acquire it. */ + MPASS(rv == 1); if (owner == UMUTEX_CONTESTED) { rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) return (EFAULT); - - if (owner == UMUTEX_CONTESTED) + if (rv == 0) { + MPASS(owner == UMUTEX_CONTESTED); return (0); - - rv = umtxq_check_susp(td); - if (rv != 0) - return (rv); + } + if (rv == 1) { + rv = umtxq_check_susp(td, false); + if (rv != 0) + return (rv); + } /* * If this failed the lock has * changed, restart. */ continue; } + + /* rv == 1 but not contested, likely store failure */ + rv = umtxq_check_susp(td, false); + if (rv != 0) + return (rv); } if (mode == _UMUTEX_TRY) return (EBUSY); /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ rv = casueword32(&m->m_owner, owner, &old, owner | UMUTEX_CONTESTED); - /* The address was invalid. */ - if (rv == -1) { + /* The address was invalid or casueword failed to store. */ + if (rv == -1 || rv == 1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); - return (EFAULT); + if (rv == -1) + return (EFAULT); + if (rv == 1) { + rv = umtxq_check_susp(td, false); + if (rv != 0) + return (rv); + } + continue; } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); - if (old == owner) - error = umtxq_sleep(uq, "umtxn", timeout == NULL ? - NULL : &timo); + MPASS(old == owner); + error = umtxq_sleep(uq, "umtxn", timeout == NULL ? + NULL : &timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == 0) - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, false); } return (0); } /* * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; uint32_t owner, old, id, newlock; int error, count; id = td->td_tid; + +again: /* * Make sure we own this mtx. */ error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); newlock = umtx_unlock_val(flags, rb); if ((owner & UMUTEX_CONTESTED) == 0) { error = casueword32(&m->m_owner, owner, &old, newlock); if (error == -1) return (EFAULT); - if (old == owner) - return (0); - owner = old; + if (error == 1) { + error = umtxq_check_susp(td, false); + if (error != 0) + return (error); + goto again; + } + MPASS(old == owner); + return (0); } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ if (count > 1) newlock |= UMUTEX_CONTESTED; error = casueword32(&m->m_owner, owner, &old, newlock); umtxq_lock(&key); umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (error == -1) return (EFAULT); - if (old != owner) - return (EINVAL); + if (error == 1) { + if (old != owner) + return (EINVAL); + error = umtxq_check_susp(td, false); + if (error != 0) + return (error); + goto again; + } return (0); } /* * Check if the mutex is available and wake up a waiter, * only for simple mutex. */ static int do_wake_umutex(struct thread *td, struct umutex *m) { struct umtx_key key; uint32_t owner; uint32_t flags; int error; int count; +again: error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && owner != UMUTEX_RB_NOTRECOV) return (0); error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && owner != UMUTEX_RB_NOTRECOV) { error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, UMUTEX_UNOWNED); - if (error == -1) + if (error == -1) { error = EFAULT; + } else if (error == 1) { + umtxq_lock(&key); + umtxq_unbusy(&key); + umtxq_unlock(&key); + umtx_key_release(&key); + error = umtxq_check_susp(td, false); + if (error != 0) + return (error); + goto again; + } } umtxq_lock(&key); - if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || - owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) + if (error == 0 && count != 0) { + MPASS((owner & ~UMUTEX_CONTESTED) == 0 || + owner == UMUTEX_RB_OWNERDEAD || + owner == UMUTEX_RB_NOTRECOV); umtxq_signal(&key, 1); + } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } /* * Check if the mutex has waiters and tries to fix contention bit. */ static int do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; uint32_t owner, old; int type; int error; int count; switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST)) { case 0: case UMUTEX_ROBUST: type = TYPE_NORMAL_UMUTEX; break; case UMUTEX_PRIO_INHERIT: type = TYPE_PI_UMUTEX; break; case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): type = TYPE_PI_ROBUST_UMUTEX; break; case UMUTEX_PRIO_PROTECT: type = TYPE_PP_UMUTEX; break; case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): type = TYPE_PP_ROBUST_UMUTEX; break; default: return (EINVAL); } if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) return (error); owner = 0; umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); + + error = fueword32(&m->m_owner, &owner); + if (error == -1) + error = EFAULT; + /* - * Only repair contention bit if there is a waiter, this means the mutex - * is still being referenced by userland code, otherwise don't update - * any memory. + * Only repair contention bit if there is a waiter, this means + * the mutex is still being referenced by userland code, + * otherwise don't update any memory. */ - if (count > 1) { - error = fueword32(&m->m_owner, &owner); - if (error == -1) + while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && + (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { + error = casueword32(&m->m_owner, owner, &old, + owner | UMUTEX_CONTESTED); + if (error == -1) { error = EFAULT; - while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { - error = casueword32(&m->m_owner, owner, &old, - owner | UMUTEX_CONTESTED); - if (error == -1) { - error = EFAULT; - break; - } - if (old == owner) - break; - owner = old; - error = umtxq_check_susp(td); - if (error != 0) - break; + break; } - } else if (count == 1) { - error = fueword32(&m->m_owner, &owner); - if (error == -1) - error = EFAULT; - while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && - (owner & UMUTEX_CONTESTED) == 0) { - error = casueword32(&m->m_owner, owner, &old, - owner | UMUTEX_CONTESTED); - if (error == -1) { - error = EFAULT; - break; - } - if (old == owner) - break; - owner = old; - error = umtxq_check_susp(td); - if (error != 0) - break; + if (error == 0) { + MPASS(old == owner); + break; } + owner = old; + error = umtxq_check_susp(td, false); } + umtxq_lock(&key); if (error == EFAULT) { umtxq_signal(&key, INT_MAX); } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static inline struct umtx_pi * umtx_pi_alloc(int flags) { struct umtx_pi *pi; pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); TAILQ_INIT(&pi->pi_blocked); atomic_add_int(&umtx_pi_allocated, 1); return (pi); } static inline void umtx_pi_free(struct umtx_pi *pi) { uma_zfree(umtx_pi_zone, pi); atomic_add_int(&umtx_pi_allocated, -1); } /* * Adjust the thread's position on a pi_state after its priority has been * changed. */ static int umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) { struct umtx_q *uq, *uq1, *uq2; struct thread *td1; mtx_assert(&umtx_lock, MA_OWNED); if (pi == NULL) return (0); uq = td->td_umtxq; /* * Check if the thread needs to be moved on the blocked chain. * It needs to be moved if either its priority is lower than * the previous thread or higher than the next thread. */ uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); uq2 = TAILQ_NEXT(uq, uq_lockq); if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { /* * Remove thread from blocked chain and determine where * it should be moved to. */ TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { td1 = uq1->uq_thread; MPASS(td1->td_proc->p_magic == P_MAGIC); if (UPRI(td1) > UPRI(td)) break; } if (uq1 == NULL) TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); else TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); } return (1); } static struct umtx_pi * umtx_pi_next(struct umtx_pi *pi) { struct umtx_q *uq_owner; if (pi->pi_owner == NULL) return (NULL); uq_owner = pi->pi_owner->td_umtxq; if (uq_owner == NULL) return (NULL); return (uq_owner->uq_pi_blocked); } /* * Floyd's Cycle-Finding Algorithm. */ static bool umtx_pi_check_loop(struct umtx_pi *pi) { struct umtx_pi *pi1; /* fast iterator */ mtx_assert(&umtx_lock, MA_OWNED); if (pi == NULL) return (false); pi1 = pi; for (;;) { pi = umtx_pi_next(pi); if (pi == NULL) break; pi1 = umtx_pi_next(pi1); if (pi1 == NULL) break; pi1 = umtx_pi_next(pi1); if (pi1 == NULL) break; if (pi == pi1) return (true); } return (false); } /* * Propagate priority when a thread is blocked on POSIX * PI mutex. */ static void umtx_propagate_priority(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; int pri; mtx_assert(&umtx_lock, MA_OWNED); pri = UPRI(td); uq = td->td_umtxq; pi = uq->uq_pi_blocked; if (pi == NULL) return; if (umtx_pi_check_loop(pi)) return; for (;;) { td = pi->pi_owner; if (td == NULL || td == curthread) return; MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); thread_lock(td); if (td->td_lend_user_pri > pri) sched_lend_user_prio(td, pri); else { thread_unlock(td); break; } thread_unlock(td); /* * Pick up the lock that td is blocked on. */ uq = td->td_umtxq; pi = uq->uq_pi_blocked; if (pi == NULL) break; /* Resort td on the list if needed. */ umtx_pi_adjust_thread(pi, td); } } /* * Unpropagate priority for a PI mutex when a thread blocked on * it is interrupted by signal or resumed by others. */ static void umtx_repropagate_priority(struct umtx_pi *pi) { struct umtx_q *uq, *uq_owner; struct umtx_pi *pi2; int pri; mtx_assert(&umtx_lock, MA_OWNED); if (umtx_pi_check_loop(pi)) return; while (pi != NULL && pi->pi_owner != NULL) { pri = PRI_MAX; uq_owner = pi->pi_owner->td_umtxq; TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { uq = TAILQ_FIRST(&pi2->pi_blocked); if (uq != NULL) { if (pri > UPRI(uq->uq_thread)) pri = UPRI(uq->uq_thread); } } if (pri > uq_owner->uq_inherited_pri) pri = uq_owner->uq_inherited_pri; thread_lock(pi->pi_owner); sched_lend_user_prio(pi->pi_owner, pri); thread_unlock(pi->pi_owner); if ((pi = uq_owner->uq_pi_blocked) != NULL) umtx_pi_adjust_thread(pi, uq_owner->uq_thread); } } /* * Insert a PI mutex into owned list. */ static void umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq_owner; uq_owner = owner->td_umtxq; mtx_assert(&umtx_lock, MA_OWNED); MPASS(pi->pi_owner == NULL); pi->pi_owner = owner; TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); } /* * Disown a PI mutex, and remove it from the owned list. */ static void umtx_pi_disown(struct umtx_pi *pi) { mtx_assert(&umtx_lock, MA_OWNED); TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); pi->pi_owner = NULL; } /* * Claim ownership of a PI mutex. */ static int umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq; int pri; mtx_lock(&umtx_lock); if (pi->pi_owner == owner) { mtx_unlock(&umtx_lock); return (0); } if (pi->pi_owner != NULL) { /* * userland may have already messed the mutex, sigh. */ mtx_unlock(&umtx_lock); return (EPERM); } umtx_pi_setowner(pi, owner); uq = TAILQ_FIRST(&pi->pi_blocked); if (uq != NULL) { pri = UPRI(uq->uq_thread); thread_lock(owner); if (pri < UPRI(owner)) sched_lend_user_prio(owner, pri); thread_unlock(owner); } mtx_unlock(&umtx_lock); return (0); } /* * Adjust a thread's order position in its blocked PI mutex, * this may result new priority propagating process. */ void umtx_pi_adjust(struct thread *td, u_char oldpri) { struct umtx_q *uq; struct umtx_pi *pi; uq = td->td_umtxq; mtx_lock(&umtx_lock); /* * Pick up the lock that td is blocked on. */ pi = uq->uq_pi_blocked; if (pi != NULL) { umtx_pi_adjust_thread(pi, td); umtx_repropagate_priority(pi); } mtx_unlock(&umtx_lock); } /* * Sleep on a PI mutex. */ static int umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, const char *wmesg, struct abs_timeout *timo, bool shared) { struct thread *td, *td1; struct umtx_q *uq1; int error, pri; #ifdef INVARIANTS struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); #endif error = 0; td = uq->uq_thread; KASSERT(td == curthread, ("inconsistent uq_thread")); UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); umtxq_insert(uq); mtx_lock(&umtx_lock); if (pi->pi_owner == NULL) { mtx_unlock(&umtx_lock); td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); mtx_lock(&umtx_lock); if (td1 != NULL) { if (pi->pi_owner == NULL) umtx_pi_setowner(pi, td1); PROC_UNLOCK(td1->td_proc); } } TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { pri = UPRI(uq1->uq_thread); if (pri > UPRI(td)) break; } if (uq1 != NULL) TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); else TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); uq->uq_pi_blocked = pi; thread_lock(td); td->td_flags |= TDF_UPIBLOCKED; thread_unlock(td); umtx_propagate_priority(td); mtx_unlock(&umtx_lock); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, wmesg, timo); umtxq_remove(uq); mtx_lock(&umtx_lock); uq->uq_pi_blocked = NULL; thread_lock(td); td->td_flags &= ~TDF_UPIBLOCKED; thread_unlock(td); TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); umtx_repropagate_priority(pi); mtx_unlock(&umtx_lock); umtxq_unlock(&uq->uq_key); return (error); } /* * Add reference count for a PI mutex. */ static void umtx_pi_ref(struct umtx_pi *pi) { UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); pi->pi_refcount++; } /* * Decrease reference count for a PI mutex, if the counter * is decreased to zero, its memory space is freed. */ static void umtx_pi_unref(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT(pi->pi_refcount > 0, ("invalid reference count")); if (--pi->pi_refcount == 0) { mtx_lock(&umtx_lock); if (pi->pi_owner != NULL) umtx_pi_disown(pi); KASSERT(TAILQ_EMPTY(&pi->pi_blocked), ("blocked queue not empty")); mtx_unlock(&umtx_lock); TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); umtx_pi_free(pi); } } /* * Find a PI mutex in hash table. */ static struct umtx_pi * umtx_pi_lookup(struct umtx_key *key) { struct umtxq_chain *uc; struct umtx_pi *pi; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { if (umtx_key_match(&pi->pi_key, key)) { return (pi); } } return (NULL); } /* * Insert a PI mutex into hash table. */ static inline void umtx_pi_insert(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); } /* * Lock a PI mutex. */ static int do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, struct _umtx_time *timeout, int try) { struct abs_timeout timo; struct umtx_q *uq; struct umtx_pi *pi, *new_pi; uint32_t id, old_owner, owner, old; int error, rv; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi == NULL) { new_pi = umtx_pi_alloc(M_NOWAIT); if (new_pi == NULL) { umtxq_unlock(&uq->uq_key); new_pi = umtx_pi_alloc(M_WAITOK); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi != NULL) { umtx_pi_free(new_pi); new_pi = NULL; } } if (new_pi != NULL) { new_pi->pi_key = uq->uq_key; umtx_pi_insert(new_pi); pi = new_pi; } } umtx_pi_ref(pi); umtxq_unlock(&uq->uq_key); /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } - /* The acquire succeeded. */ - if (owner == UMUTEX_UNOWNED) { + if (rv == 0) { + MPASS(owner == UMUTEX_UNOWNED); error = 0; break; } if (owner == UMUTEX_RB_NOTRECOV) { error = ENOTRECOVERABLE; break; } + /* + * Avoid overwriting a possible error from sleep due + * to the pending signal with suspension check result. + */ + if (error == 0) { + error = umtxq_check_susp(td, true); + if (error != 0) + break; + } + /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { old_owner = owner; rv = casueword32(&m->m_owner, owner, &owner, id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } - - if (owner == old_owner) { - umtxq_lock(&uq->uq_key); - umtxq_busy(&uq->uq_key); - error = umtx_pi_claim(pi, td); - umtxq_unbusy(&uq->uq_key); - umtxq_unlock(&uq->uq_key); - if (error != 0) { - /* - * Since we're going to return an - * error, restore the m_owner to its - * previous, unowned state to avoid - * compounding the problem. - */ - (void)casuword32(&m->m_owner, - id | UMUTEX_CONTESTED, - old_owner); + if (rv == 1) { + if (error == 0) { + error = umtxq_check_susp(td, true); + if (error != 0) + return (error); } - if (error == 0 && - old_owner == UMUTEX_RB_OWNERDEAD) - error = EOWNERDEAD; - break; - } - error = umtxq_check_susp(td); - if (error != 0) - break; + /* + * If this failed the lock could + * changed, restart. + */ + continue; + } - /* If this failed the lock has changed, restart. */ - continue; + MPASS(rv == 0); + MPASS(owner == old_owner); + umtxq_lock(&uq->uq_key); + umtxq_busy(&uq->uq_key); + error = umtx_pi_claim(pi, td); + umtxq_unbusy(&uq->uq_key); + umtxq_unlock(&uq->uq_key); + if (error != 0) { + /* + * Since we're going to return an + * error, restore the m_owner to its + * previous, unowned state to avoid + * compounding the problem. + */ + (void)casuword32(&m->m_owner, + id | UMUTEX_CONTESTED, old_owner); + } + if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) + error = EOWNERDEAD; + break; } if ((owner & ~UMUTEX_CONTESTED) == id) { error = EDEADLK; break; } if (try != 0) { error = EBUSY; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ rv = casueword32(&m->m_owner, owner, &old, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } - - umtxq_lock(&uq->uq_key); - /* - * We set the contested bit, sleep. Otherwise the lock changed - * and we need to retry or we lost a race to the thread - * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD - * value for owner is impossible there. - */ - if (old == owner) { - error = umtxq_sleep_pi(uq, pi, - owner & ~UMUTEX_CONTESTED, - "umtxpi", timeout == NULL ? NULL : &timo, - (flags & USYNC_PROCESS_SHARED) != 0); + if (rv == 1) { + umtxq_unbusy_unlocked(&uq->uq_key); + error = umtxq_check_susp(td, true); if (error != 0) - continue; - } else { - umtxq_unbusy(&uq->uq_key); - umtxq_unlock(&uq->uq_key); + break; + + /* + * The lock changed and we need to retry or we + * lost a race to the thread unlocking the + * umtx. Note that the UMUTEX_RB_OWNERDEAD + * value for owner is impossible there. + */ + continue; } - error = umtxq_check_susp(td); + umtxq_lock(&uq->uq_key); + + /* We set the contested bit, sleep. */ + MPASS(old == owner); + error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, + "umtxpi", timeout == NULL ? NULL : &timo, + (flags & USYNC_PROCESS_SHARED) != 0); + if (error != 0) + continue; + + error = umtxq_check_susp(td, false); if (error != 0) break; } umtxq_lock(&uq->uq_key); umtx_pi_unref(pi); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PI mutex. */ static int do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; struct umtx_q *uq_first, *uq_first2, *uq_me; struct umtx_pi *pi, *pi2; uint32_t id, new_owner, old, owner; int count, error, pri; id = td->td_tid; + +usrloop: /* * Make sure we own this mtx. */ error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); new_owner = umtx_unlock_val(flags, rb); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { error = casueword32(&m->m_owner, owner, &old, new_owner); if (error == -1) return (EFAULT); + if (error == 1) { + error = umtxq_check_susp(td, true); + if (error != 0) + return (error); + goto usrloop; + } if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count_pi(&key, &uq_first); if (uq_first != NULL) { mtx_lock(&umtx_lock); pi = uq_first->uq_pi_blocked; KASSERT(pi != NULL, ("pi == NULL?")); if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { mtx_unlock(&umtx_lock); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); /* userland messed the mutex */ return (EPERM); } uq_me = td->td_umtxq; if (pi->pi_owner == td) umtx_pi_disown(pi); /* get highest priority thread which is still sleeping. */ uq_first = TAILQ_FIRST(&pi->pi_blocked); while (uq_first != NULL && (uq_first->uq_flags & UQF_UMTXQ) == 0) { uq_first = TAILQ_NEXT(uq_first, uq_lockq); } pri = PRI_MAX; TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); if (uq_first2 != NULL) { if (pri > UPRI(uq_first2->uq_thread)) pri = UPRI(uq_first2->uq_thread); } } thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); if (uq_first) umtxq_signal_thread(uq_first); } else { pi = umtx_pi_lookup(&key); /* * A umtx_pi can exist if a signal or timeout removed the * last waiter from the umtxq, but there is still * a thread in do_lock_pi() holding the umtx_pi. */ if (pi != NULL) { /* * The umtx_pi can be unowned, such as when a thread * has just entered do_lock_pi(), allocated the * umtx_pi, and unlocked the umtxq. * If the current thread owns it, it must disown it. */ mtx_lock(&umtx_lock); if (pi->pi_owner == td) umtx_pi_disown(pi); mtx_unlock(&umtx_lock); } } umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ if (count > 1) new_owner |= UMUTEX_CONTESTED; +again: error = casueword32(&m->m_owner, owner, &old, new_owner); - + if (error == 1) { + error = umtxq_check_susp(td, false); + if (error == 0) + goto again; + } umtxq_unbusy_unlocked(&key); umtx_key_release(&key); if (error == -1) return (EFAULT); - if (old != owner) + if (error == 0 && old != owner) return (EINVAL); - return (0); + return (error); } /* * Lock a PP mutex. */ static int do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, struct _umtx_time *timeout, int try) { struct abs_timeout timo; struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t ceiling; uint32_t owner, id; int error, pri, old_inherited_pri, su, rv; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); for (;;) { old_inherited_pri = uq->uq_inherited_pri; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); rv = fueword32(&m->m_ceilings[0], &ceiling); if (rv == -1) { error = EFAULT; goto out; } ceiling = RTP_PRIO_MAX - ceiling; if (ceiling > RTP_PRIO_MAX) { error = EINVAL; goto out; } mtx_lock(&umtx_lock); if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { mtx_unlock(&umtx_lock); error = EINVAL; goto out; } if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; thread_lock(td); if (uq->uq_inherited_pri < UPRI(td)) sched_lend_user_prio(td, uq->uq_inherited_pri); thread_unlock(td); } mtx_unlock(&umtx_lock); rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } - - if (owner == UMUTEX_CONTESTED) { + if (rv == 0) { + MPASS(owner == UMUTEX_CONTESTED); error = 0; break; - } else if (owner == UMUTEX_RB_OWNERDEAD) { + } + /* rv == 1 */ + if (owner == UMUTEX_RB_OWNERDEAD) { rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, &owner, id | UMUTEX_CONTESTED); if (rv == -1) { error = EFAULT; break; } - if (owner == UMUTEX_RB_OWNERDEAD) { + if (rv == 0) { + MPASS(owner == UMUTEX_RB_OWNERDEAD); error = EOWNERDEAD; /* success */ break; } - error = 0; + + /* + * rv == 1, only check for suspension if we + * did not already catched a signal. If we + * get an error from the check, the same + * condition is checked by the umtxq_sleep() + * call below, so we should obliterate the + * error to not skip the last loop iteration. + */ + if (error == 0) { + error = umtxq_check_susp(td, false); + if (error == 0) { + if (try != 0) + error = EBUSY; + else + continue; + } + error = 0; + } } else if (owner == UMUTEX_RB_NOTRECOV) { error = ENOTRECOVERABLE; - break; } - if (try != 0) { + if (try != 0) error = EBUSY; - break; - } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? NULL : &timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); mtx_lock(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); } if (error != 0 && error != EOWNERDEAD) { mtx_lock(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); } out: umtxq_unbusy_unlocked(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PP mutex. */ static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t id, owner, rceiling; int error, pri, new_inherited_pri, su; id = td->td_tid; uq = td->td_umtxq; su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); /* * Make sure we own this mtx. */ error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); if (error != 0) return (error); if (rceiling == -1) new_inherited_pri = PRI_MAX; else { rceiling = RTP_PRIO_MAX - rceiling; if (rceiling > RTP_PRIO_MAX) return (EINVAL); new_inherited_pri = PRI_MIN_REALTIME + rceiling; } if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_unlock(&key); /* * For priority protected mutex, always set unlocked state * to UMUTEX_CONTESTED, so that userland always enters kernel * to lock the mutex, it is necessary because thread priority * has to be adjusted for such mutex. */ error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | UMUTEX_CONTESTED); umtxq_lock(&key); if (error == 0) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); if (error == -1) error = EFAULT; else { mtx_lock(&umtx_lock); if (su != 0) uq->uq_inherited_pri = new_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); } umtx_key_release(&key); return (error); } static int do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, uint32_t *old_ceiling) { struct umtx_q *uq; uint32_t flags, id, owner, save_ceiling; int error, rv, rv1; error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); if ((flags & UMUTEX_PRIO_PROTECT) == 0) return (EINVAL); if (ceiling > RTP_PRIO_MAX) return (EINVAL); id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); for (;;) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); rv = fueword32(&m->m_ceilings[0], &save_ceiling); if (rv == -1) { error = EFAULT; break; } rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); if (rv == -1) { error = EFAULT; break; } if (owner == UMUTEX_CONTESTED) { rv = suword32(&m->m_ceilings[0], ceiling); rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); error = (rv == 0 && rv1 == 0) ? 0: EFAULT; break; } if ((owner & ~UMUTEX_CONTESTED) == id) { rv = suword32(&m->m_ceilings[0], ceiling); error = rv == 0 ? 0 : EFAULT; break; } if (owner == UMUTEX_RB_OWNERDEAD) { error = EOWNERDEAD; break; } else if (owner == UMUTEX_RB_NOTRECOV) { error = ENOTRECOVERABLE; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", NULL); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } umtxq_lock(&uq->uq_key); if (error == 0) umtxq_signal(&uq->uq_key, INT_MAX); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == 0 && old_ceiling != NULL) { rv = suword32(old_ceiling, save_ceiling); error = rv == 0 ? 0 : EFAULT; } return (error); } /* * Lock a userland POSIX mutex. */ static int do_lock_umutex(struct thread *td, struct umutex *m, struct _umtx_time *timeout, int mode) { uint32_t flags; int error; error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: error = do_lock_normal(td, m, flags, timeout, mode); break; case UMUTEX_PRIO_INHERIT: error = do_lock_pi(td, m, flags, timeout, mode); break; case UMUTEX_PRIO_PROTECT: error = do_lock_pp(td, m, flags, timeout, mode); break; default: return (EINVAL); } if (timeout == NULL) { if (error == EINTR && mode != _UMUTEX_WAIT) error = ERESTART; } else { /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a userland POSIX mutex. */ static int do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) { uint32_t flags; int error; error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: return (do_unlock_normal(td, m, flags, rb)); case UMUTEX_PRIO_INHERIT: return (do_unlock_pi(td, m, flags, rb)); case UMUTEX_PRIO_PROTECT: return (do_unlock_pp(td, m, flags, rb)); } return (EINVAL); } static int do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, struct timespec *timeout, u_long wflags) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags, clockid, hasw; int error; uq = td->td_umtxq; error = fueword32(&cv->c_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if ((wflags & CVWAIT_CLOCKID) != 0) { error = fueword32(&cv->c_clockid, &clockid); if (error == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } if (clockid < CLOCK_REALTIME || clockid >= CLOCK_THREAD_CPUTIME_ID) { /* hmm, only HW clock id will work. */ umtx_key_release(&uq->uq_key); return (EINVAL); } } else { clockid = CLOCK_REALTIME; } umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* * Set c_has_waiters to 1 before releasing user mutex, also * don't modify cache line when unnecessary. */ error = fueword32(&cv->c_has_waiters, &hasw); if (error == 0 && hasw == 0) suword32(&cv->c_has_waiters, 1); umtxq_unbusy_unlocked(&uq->uq_key); error = do_unlock_umutex(td, m, false); if (timeout != NULL) abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, timeout); umtxq_lock(&uq->uq_key); if (error == 0) { error = umtxq_sleep(uq, "ucond", timeout == NULL ? NULL : &timo); } if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { /* * This must be timeout,interrupted by signal or * surprious wakeup, clear c_has_waiter flag when * necessary. */ umtxq_busy(&uq->uq_key); if ((uq->uq_flags & UQF_UMTXQ) != 0) { int oldlen = uq->uq_cur_queue->length; umtxq_remove(uq); if (oldlen == 1) { umtxq_unlock(&uq->uq_key); suword32(&cv->c_has_waiters, 0); umtxq_lock(&uq->uq_key); } } umtxq_unbusy(&uq->uq_key); if (error == ERESTART) error = EINTR; } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland condition variable. */ static int do_cv_signal(struct thread *td, struct ucond *cv) { struct umtx_key key; int error, cnt, nwake; uint32_t flags; error = fueword32(&cv->c_flags, &flags); if (error == -1) return (EFAULT); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); nwake = umtxq_signal(&key, 1); if (cnt <= nwake) { umtxq_unlock(&key); error = suword32(&cv->c_has_waiters, 0); if (error == -1) error = EFAULT; umtxq_lock(&key); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static int do_cv_broadcast(struct thread *td, struct ucond *cv) { struct umtx_key key; int error; uint32_t flags; error = fueword32(&cv->c_flags, &flags); if (error == -1) return (EFAULT); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_signal(&key, INT_MAX); umtxq_unlock(&key); error = suword32(&cv->c_has_waiters, 0); if (error == -1) error = EFAULT; umtxq_unbusy_unlocked(&key); umtx_key_release(&key); return (error); } static int do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags, wrflags; int32_t state, oldstate; int32_t blocked_readers; int error, error1, rv; uq = td->td_umtxq; error = fueword32(&rwlock->rw_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); wrflags = URWLOCK_WRITE_OWNER; if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) wrflags |= URWLOCK_WRITE_WAITERS; for (;;) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } /* try to lock it */ while (!(state & wrflags)) { if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { umtx_key_release(&uq->uq_key); return (EAGAIN); } rv = casueword32(&rwlock->rw_state, state, &oldstate, state + 1); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } - if (oldstate == state) { + if (rv == 0) { + MPASS(oldstate == state); umtx_key_release(&uq->uq_key); return (0); } - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, true); if (error != 0) break; state = oldstate; } if (error) break; /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * re-read the state, in case it changed between the try-lock above * and the check below */ rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) error = EFAULT; /* set read contention bit */ while (error == 0 && (state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state | URWLOCK_READ_WAITERS); if (rv == -1) { error = EFAULT; break; } - if (oldstate == state) + if (rv == 0) { + MPASS(oldstate == state); goto sleep; + } state = oldstate; - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, false); if (error != 0) break; } if (error != 0) { umtxq_unbusy_unlocked(&uq->uq_key); break; } /* state is changed while setting flags, restart */ if (!(state & wrflags)) { umtxq_unbusy_unlocked(&uq->uq_key); - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, true); if (error != 0) break; continue; } sleep: /* * Contention bit is set, before sleeping, increase * read waiter count. */ rv = fueword32(&rwlock->rw_blocked_readers, &blocked_readers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_readers, blocked_readers+1); while (state & wrflags) { umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "urdlck", timeout == NULL ? NULL : &timo); umtxq_busy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); if (error) break; rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { error = EFAULT; break; } } /* decrease read waiter count, and may clear read contention bit */ rv = fueword32(&rwlock->rw_blocked_readers, &blocked_readers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_readers, blocked_readers-1); if (blocked_readers == 1) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state & ~URWLOCK_READ_WAITERS); if (rv == -1) { error = EFAULT; break; } - if (oldstate == state) + if (rv == 0) { + MPASS(oldstate == state); break; + } state = oldstate; - error1 = umtxq_check_susp(td); + error1 = umtxq_check_susp(td, false); if (error1 != 0) { if (error == 0) error = error1; break; } } } umtxq_unbusy_unlocked(&uq->uq_key); if (error != 0) break; } umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } static int do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int32_t blocked_writers; int32_t blocked_readers; int error, error1, rv; uq = td->td_umtxq; error = fueword32(&rwlock->rw_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); blocked_readers = 0; for (;;) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } while ((state & URWLOCK_WRITE_OWNER) == 0 && URWLOCK_READER_COUNT(state) == 0) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state | URWLOCK_WRITE_OWNER); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } - if (oldstate == state) { + if (rv == 0) { + MPASS(oldstate == state); umtx_key_release(&uq->uq_key); return (0); } state = oldstate; - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, true); if (error != 0) break; } if (error) { - if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && + if ((state & (URWLOCK_WRITE_OWNER | + URWLOCK_WRITE_WAITERS)) == 0 && blocked_readers != 0) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); - umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); + umtxq_signal_queue(&uq->uq_key, INT_MAX, + UMTX_SHARED_QUEUE); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } break; } /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Re-read the state, in case it changed between the * try-lock above and the check below. */ rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) error = EFAULT; while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && (state & URWLOCK_WRITE_WAITERS) == 0) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state | URWLOCK_WRITE_WAITERS); if (rv == -1) { error = EFAULT; break; } - if (oldstate == state) + if (rv == 0) { + MPASS(oldstate == state); goto sleep; + } state = oldstate; - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, false); if (error != 0) break; } if (error != 0) { umtxq_unbusy_unlocked(&uq->uq_key); break; } if ((state & URWLOCK_WRITE_OWNER) == 0 && URWLOCK_READER_COUNT(state) == 0) { umtxq_unbusy_unlocked(&uq->uq_key); - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, false); if (error != 0) break; continue; } sleep: rv = fueword32(&rwlock->rw_blocked_writers, &blocked_writers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { umtxq_lock(&uq->uq_key); umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? NULL : &timo); umtxq_busy(&uq->uq_key); umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unlock(&uq->uq_key); if (error) break; rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { error = EFAULT; break; } } rv = fueword32(&rwlock->rw_blocked_writers, &blocked_writers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_writers, blocked_writers-1); if (blocked_writers == 1) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state & ~URWLOCK_WRITE_WAITERS); if (rv == -1) { error = EFAULT; break; } - if (oldstate == state) + if (rv == 0) { + MPASS(oldstate == state); break; + } state = oldstate; - error1 = umtxq_check_susp(td); + error1 = umtxq_check_susp(td, false); /* * We are leaving the URWLOCK_WRITE_WAITERS * behind, but this should not harm the * correctness. */ if (error1 != 0) { if (error == 0) error = error1; break; } } rv = fueword32(&rwlock->rw_blocked_readers, &blocked_readers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } } else blocked_readers = 0; umtxq_unbusy_unlocked(&uq->uq_key); } umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } static int do_rw_unlock(struct thread *td, struct urwlock *rwlock) { struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int error, rv, q, count; uq = td->td_umtxq; error = fueword32(&rwlock->rw_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); error = fueword32(&rwlock->rw_state, &state); if (error == -1) { error = EFAULT; goto out; } if (state & URWLOCK_WRITE_OWNER) { for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state & ~URWLOCK_WRITE_OWNER); if (rv == -1) { error = EFAULT; goto out; } - if (oldstate != state) { + if (rv == 1) { state = oldstate; if (!(oldstate & URWLOCK_WRITE_OWNER)) { error = EPERM; goto out; } - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, true); if (error != 0) goto out; } else break; } } else if (URWLOCK_READER_COUNT(state) != 0) { for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state - 1); if (rv == -1) { error = EFAULT; goto out; } - if (oldstate != state) { + if (rv == 1) { state = oldstate; if (URWLOCK_READER_COUNT(oldstate) == 0) { error = EPERM; goto out; } - error = umtxq_check_susp(td); + error = umtxq_check_susp(td, true); if (error != 0) goto out; } else break; } } else { error = EPERM; goto out; } count = 0; if (!(flags & URWLOCK_PREFER_READER)) { if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } else if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } } else { if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } else if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } } if (count) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_signal_queue(&uq->uq_key, count, q); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } out: umtx_key_release(&uq->uq_key); return (error); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) static int do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags, count, count1; - int error, rv; + int error, rv, rv1; uq = td->td_umtxq; error = fueword32(&sem->_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); +again: umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); rv = casueword32(&sem->_has_waiters, 0, &count1, 1); if (rv == 0) - rv = fueword32(&sem->_count, &count); - if (rv == -1 || count != 0) { + rv1 = fueword32(&sem->_count, &count); + if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || rv == 1) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); - umtx_key_release(&uq->uq_key); - return (rv == -1 ? EFAULT : 0); + if (rv == 1) { + rv = umtxq_check_susp(td, true); + if (rv == 0) + goto again; + error = rv; + goto out; + } + if (rv == 0) + rv = rv1; + error = rv == -1 ? EFAULT : 0; + goto out; } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { umtxq_remove(uq); /* A relative timeout cannot be restarted. */ if (error == ERESTART && timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) error = EINTR; } umtxq_unlock(&uq->uq_key); +out: umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland semaphore. */ static int do_sem_wake(struct thread *td, struct _usem *sem) { struct umtx_key key; int error, cnt; uint32_t flags; error = fueword32(&sem->_flags, &flags); if (error == -1) return (EFAULT); if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); if (cnt > 0) { /* * Check if count is greater than 0, this means the memory is * still being referenced by user code, so we can safely * update _has_waiters flag. */ if (cnt == 1) { umtxq_unlock(&key); error = suword32(&sem->_has_waiters, 0); umtxq_lock(&key); if (error == -1) error = EFAULT; } umtxq_signal(&key, 1); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } #endif static int do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t count, flags; int error, rv; uq = td->td_umtxq; flags = fuword32(&sem->_flags); error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); +again: umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); rv = fueword32(&sem->_count, &count); if (rv == -1) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } for (;;) { if (USEM_COUNT(count) != 0) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (0); } if (count == USEM_HAS_WAITERS) break; rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); - if (rv == -1) { - umtxq_lock(&uq->uq_key); - umtxq_unbusy(&uq->uq_key); - umtxq_remove(uq); - umtxq_unlock(&uq->uq_key); - umtx_key_release(&uq->uq_key); - return (EFAULT); - } - if (count == 0) + if (rv == 0) break; + umtxq_lock(&uq->uq_key); + umtxq_unbusy(&uq->uq_key); + umtxq_remove(uq); + umtxq_unlock(&uq->uq_key); + umtx_key_release(&uq->uq_key); + if (rv == -1) + return (EFAULT); + rv = umtxq_check_susp(td, true); + if (rv != 0) + return (rv); + goto again; } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { umtxq_remove(uq); if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { /* A relative timeout cannot be restarted. */ if (error == ERESTART) error = EINTR; if (error == EINTR) { abs_timeout_update(&timo); timespecsub(&timo.end, &timo.cur, &timeout->_timeout); } } } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland semaphore. */ static int do_sem2_wake(struct thread *td, struct _usem2 *sem) { struct umtx_key key; int error, cnt, rv; uint32_t count, flags; rv = fueword32(&sem->_flags, &flags); if (rv == -1) return (EFAULT); if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); if (cnt > 0) { /* * If this was the last sleeping thread, clear the waiters * flag in _count. */ if (cnt == 1) { umtxq_unlock(&key); rv = fueword32(&sem->_count, &count); - while (rv != -1 && count & USEM_HAS_WAITERS) + while (rv != -1 && count & USEM_HAS_WAITERS) { rv = casueword32(&sem->_count, count, &count, count & ~USEM_HAS_WAITERS); + if (rv == 1) { + rv = umtxq_check_susp(td, true); + if (rv != 0) + break; + } + } if (rv == -1) error = EFAULT; + else if (rv > 0) { + error = rv; + } umtxq_lock(&key); } umtxq_signal(&key, 1); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } inline int umtx_copyin_timeout(const void *addr, struct timespec *tsp) { int error; error = copyin(addr, tsp, sizeof(struct timespec)); if (error == 0) { if (tsp->tv_sec < 0 || tsp->tv_nsec >= 1000000000 || tsp->tv_nsec < 0) error = EINVAL; } return (error); } static inline int umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) { int error; if (size <= sizeof(struct timespec)) { tp->_clockid = CLOCK_REALTIME; tp->_flags = 0; error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); } else error = copyin(addr, tp, sizeof(struct _umtx_time)); if (error != 0) return (error); if (tp->_timeout.tv_sec < 0 || tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) return (EINVAL); return (0); } static int __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) { return (EOPNOTSUPP); } static int __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time timeout, *tm_p; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); } static int __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time timeout, *tm_p; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); } static int __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); } static int __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) { return (kern_umtx_wake(td, uap->obj, uap->val, 0)); } #define BATCH_SIZE 128 static int __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) { char *uaddrs[BATCH_SIZE], **upp; int count, error, i, pos, tocopy; upp = (char **)uap->obj; error = 0; for (count = uap->val, pos = 0; count > 0; count -= tocopy, pos += tocopy) { tocopy = MIN(count, BATCH_SIZE); error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); if (error != 0) break; for (i = 0; i < tocopy; ++i) kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); maybe_yield(); } return (error); } static int __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) { return (kern_umtx_wake(td, uap->obj, uap->val, 1)); } static int __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_lock_umutex(td, uap->obj, tm_p, 0)); } static int __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) { return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); } static int __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); } static int __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) { return (do_wake_umutex(td, uap->obj)); } static int __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) { return (do_unlock_umutex(td, uap->obj, false)); } static int __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) { return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); } static int __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = umtx_copyin_timeout(uap->uaddr2, &timeout); if (error != 0) return (error); ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) { return (do_cv_signal(td, uap->obj)); } static int __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) { return (do_cv_broadcast(td, uap->obj)); } static int __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { error = umtx_copyin_umtx_time(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); } return (error); } static int __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { error = umtx_copyin_umtx_time(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); error = do_rw_wrlock(td, uap->obj, &timeout); } return (error); } static int __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) { return (do_rw_unlock(td, uap->obj)); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) static int __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_sem_wait(td, uap->obj, tm_p)); } static int __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) { return (do_sem_wake(td, uap->obj)); } #endif static int __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) { return (do_wake2_umutex(td, uap->obj, uap->val)); } static int __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; size_t uasize; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { uasize = 0; tm_p = NULL; } else { uasize = (size_t)uap->uaddr1; error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); if (error != 0) return (error); tm_p = &timeout; } error = do_sem2_wait(td, uap->obj, tm_p); if (error == EINTR && uap->uaddr2 != NULL && (timeout._flags & UMTX_ABSTIME) == 0 && uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { error = copyout(&timeout._timeout, (struct _umtx_time *)uap->uaddr2 + 1, sizeof(struct timespec)); if (error == 0) { error = EINTR; } } return (error); } static int __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) { return (do_sem2_wake(td, uap->obj)); } #define USHM_OBJ_UMTX(o) \ ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) #define USHMF_REG_LINKED 0x0001 #define USHMF_OBJ_LINKED 0x0002 struct umtx_shm_reg { TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; LIST_ENTRY(umtx_shm_reg) ushm_obj_link; struct umtx_key ushm_key; struct ucred *ushm_cred; struct shmfd *ushm_obj; u_int ushm_refcnt; u_int ushm_flags; }; LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); static uma_zone_t umtx_shm_reg_zone; static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; static struct mtx umtx_shm_lock; static struct umtx_shm_reg_head umtx_shm_reg_delfree = TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); static void umtx_shm_free_reg(struct umtx_shm_reg *reg); static void umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) { struct umtx_shm_reg_head d; struct umtx_shm_reg *reg, *reg1; TAILQ_INIT(&d); mtx_lock(&umtx_shm_lock); TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); mtx_unlock(&umtx_shm_lock); TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { TAILQ_REMOVE(&d, reg, ushm_reg_link); umtx_shm_free_reg(reg); } } static struct task umtx_shm_reg_delfree_task = TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); static struct umtx_shm_reg * umtx_shm_find_reg_locked(const struct umtx_key *key) { struct umtx_shm_reg *reg; struct umtx_shm_reg_head *reg_head; KASSERT(key->shared, ("umtx_p_find_rg: private key")); mtx_assert(&umtx_shm_lock, MA_OWNED); reg_head = &umtx_shm_registry[key->hash]; TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { KASSERT(reg->ushm_key.shared, ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); if (reg->ushm_key.info.shared.object == key->info.shared.object && reg->ushm_key.info.shared.offset == key->info.shared.offset) { KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); KASSERT(reg->ushm_refcnt > 0, ("reg %p refcnt 0 onlist", reg)); KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, ("reg %p not linked", reg)); reg->ushm_refcnt++; return (reg); } } return (NULL); } static struct umtx_shm_reg * umtx_shm_find_reg(const struct umtx_key *key) { struct umtx_shm_reg *reg; mtx_lock(&umtx_shm_lock); reg = umtx_shm_find_reg_locked(key); mtx_unlock(&umtx_shm_lock); return (reg); } static void umtx_shm_free_reg(struct umtx_shm_reg *reg) { chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); crfree(reg->ushm_cred); shm_drop(reg->ushm_obj); uma_zfree(umtx_shm_reg_zone, reg); } static bool umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) { bool res; mtx_assert(&umtx_shm_lock, MA_OWNED); KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); reg->ushm_refcnt--; res = reg->ushm_refcnt == 0; if (res || force) { if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], reg, ushm_reg_link); reg->ushm_flags &= ~USHMF_REG_LINKED; } if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { LIST_REMOVE(reg, ushm_obj_link); reg->ushm_flags &= ~USHMF_OBJ_LINKED; } } return (res); } static void umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) { vm_object_t object; bool dofree; if (force) { object = reg->ushm_obj->shm_object; VM_OBJECT_WLOCK(object); object->flags |= OBJ_UMTXDEAD; VM_OBJECT_WUNLOCK(object); } mtx_lock(&umtx_shm_lock); dofree = umtx_shm_unref_reg_locked(reg, force); mtx_unlock(&umtx_shm_lock); if (dofree) umtx_shm_free_reg(reg); } void umtx_shm_object_init(vm_object_t object) { LIST_INIT(USHM_OBJ_UMTX(object)); } void umtx_shm_object_terminated(vm_object_t object) { struct umtx_shm_reg *reg, *reg1; bool dofree; if (LIST_EMPTY(USHM_OBJ_UMTX(object))) return; dofree = false; mtx_lock(&umtx_shm_lock); LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { if (umtx_shm_unref_reg_locked(reg, true)) { TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, ushm_reg_link); dofree = true; } } mtx_unlock(&umtx_shm_lock); if (dofree) taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); } static int umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, struct umtx_shm_reg **res) { struct umtx_shm_reg *reg, *reg1; struct ucred *cred; int error; reg = umtx_shm_find_reg(key); if (reg != NULL) { *res = reg; return (0); } cred = td->td_ucred; if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) return (ENOMEM); reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); reg->ushm_refcnt = 1; bcopy(key, ®->ushm_key, sizeof(*key)); reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); reg->ushm_cred = crhold(cred); error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); if (error != 0) { umtx_shm_free_reg(reg); return (error); } mtx_lock(&umtx_shm_lock); reg1 = umtx_shm_find_reg_locked(key); if (reg1 != NULL) { mtx_unlock(&umtx_shm_lock); umtx_shm_free_reg(reg); *res = reg1; return (0); } reg->ushm_refcnt++; TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, ushm_obj_link); reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; mtx_unlock(&umtx_shm_lock); *res = reg; return (0); } static int umtx_shm_alive(struct thread *td, void *addr) { vm_map_t map; vm_map_entry_t entry; vm_object_t object; vm_pindex_t pindex; vm_prot_t prot; int res, ret; boolean_t wired; map = &td->td_proc->p_vmspace->vm_map; res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, &object, &pindex, &prot, &wired); if (res != KERN_SUCCESS) return (EFAULT); if (object == NULL) ret = EINVAL; else ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; vm_map_lookup_done(map, entry); return (ret); } static void umtx_shm_init(void) { int i; umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); for (i = 0; i < nitems(umtx_shm_registry); i++) TAILQ_INIT(&umtx_shm_registry[i]); } static int umtx_shm(struct thread *td, void *addr, u_int flags) { struct umtx_key key; struct umtx_shm_reg *reg; struct file *fp; int error, fd; if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) return (EINVAL); if ((flags & UMTX_SHM_ALIVE) != 0) return (umtx_shm_alive(td, addr)); error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); if (error != 0) return (error); KASSERT(key.shared == 1, ("non-shared key")); if ((flags & UMTX_SHM_CREAT) != 0) { error = umtx_shm_create_reg(td, &key, ®); } else { reg = umtx_shm_find_reg(&key); if (reg == NULL) error = ESRCH; } umtx_key_release(&key); if (error != 0) return (error); KASSERT(reg != NULL, ("no reg")); if ((flags & UMTX_SHM_DESTROY) != 0) { umtx_shm_unref_reg(reg, true); } else { #if 0 #ifdef MAC error = mac_posixshm_check_open(td->td_ucred, reg->ushm_obj, FFLAGS(O_RDWR)); if (error == 0) #endif error = shm_access(reg->ushm_obj, td->td_ucred, FFLAGS(O_RDWR)); if (error == 0) #endif error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); if (error == 0) { shm_hold(reg->ushm_obj); finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, &shm_ops); td->td_retval[0] = fd; fdrop(fp, td); } } umtx_shm_unref_reg(reg, false); return (error); } static int __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) { return (umtx_shm(td, uap->uaddr1, uap->val)); } static int umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) { td->td_rb_list = rbp->robust_list_offset; td->td_rbp_list = rbp->robust_priv_list_offset; td->td_rb_inact = rbp->robust_inact_offset; return (0); } static int __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) { struct umtx_robust_lists_params rb; int error; if (uap->val > sizeof(rb)) return (EINVAL); bzero(&rb, sizeof(rb)); error = copyin(uap->uaddr1, &rb, uap->val); if (error != 0) return (error); return (umtx_robust_lists(td, &rb)); } typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); static const _umtx_op_func op_table[] = { [UMTX_OP_RESERVED0] = __umtx_op_unimpl, [UMTX_OP_RESERVED1] = __umtx_op_unimpl, [UMTX_OP_WAIT] = __umtx_op_wait, [UMTX_OP_WAKE] = __umtx_op_wake, [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, #else [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, #endif [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, [UMTX_OP_SHM] = __umtx_op_shm, [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, }; int sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) { if ((unsigned)uap->op < nitems(op_table)) return (*op_table[uap->op])(td, uap); return (EINVAL); } #ifdef COMPAT_FREEBSD32 struct timespec32 { int32_t tv_sec; int32_t tv_nsec; }; struct umtx_time32 { struct timespec32 timeout; uint32_t flags; uint32_t clockid; }; static inline int umtx_copyin_timeout32(void *addr, struct timespec *tsp) { struct timespec32 ts32; int error; error = copyin(addr, &ts32, sizeof(struct timespec32)); if (error == 0) { if (ts32.tv_sec < 0 || ts32.tv_nsec >= 1000000000 || ts32.tv_nsec < 0) error = EINVAL; else { tsp->tv_sec = ts32.tv_sec; tsp->tv_nsec = ts32.tv_nsec; } } return (error); } static inline int umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) { struct umtx_time32 t32; int error; t32.clockid = CLOCK_REALTIME; t32.flags = 0; if (size <= sizeof(struct timespec32)) error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); else error = copyin(addr, &t32, sizeof(struct umtx_time32)); if (error != 0) return (error); if (t32.timeout.tv_sec < 0 || t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) return (EINVAL); tp->_timeout.tv_sec = t32.timeout.tv_sec; tp->_timeout.tv_nsec = t32.timeout.tv_nsec; tp->_flags = t32.flags; tp->_clockid = t32.clockid; return (0); } static int __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time32(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); } static int __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time32(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_lock_umutex(td, uap->obj, tm_p, 0)); } static int __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time32(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); } static int __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = umtx_copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { error = umtx_copyin_umtx_time32(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); } return (error); } static int __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { error = umtx_copyin_umtx_time32(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); error = do_rw_wrlock(td, uap->obj, &timeout); } return (error); } static int __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time32( uap->uaddr2, (size_t)uap->uaddr1,&timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) static int __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { error = umtx_copyin_umtx_time32(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_sem_wait(td, uap->obj, tm_p)); } #endif static int __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct _umtx_time *tm_p, timeout; size_t uasize; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { uasize = 0; tm_p = NULL; } else { uasize = (size_t)uap->uaddr1; error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); if (error != 0) return (error); tm_p = &timeout; } error = do_sem2_wait(td, uap->obj, tm_p); if (error == EINTR && uap->uaddr2 != NULL && (timeout._flags & UMTX_ABSTIME) == 0 && uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { struct timespec32 remain32 = { .tv_sec = timeout._timeout.tv_sec, .tv_nsec = timeout._timeout.tv_nsec }; error = copyout(&remain32, (struct umtx_time32 *)uap->uaddr2 + 1, sizeof(struct timespec32)); if (error == 0) { error = EINTR; } } return (error); } static int __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) { uint32_t uaddrs[BATCH_SIZE], **upp; int count, error, i, pos, tocopy; upp = (uint32_t **)uap->obj; error = 0; for (count = uap->val, pos = 0; count > 0; count -= tocopy, pos += tocopy) { tocopy = MIN(count, BATCH_SIZE); error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); if (error != 0) break; for (i = 0; i < tocopy; ++i) kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], INT_MAX, 1); maybe_yield(); } return (error); } struct umtx_robust_lists_params_compat32 { uint32_t robust_list_offset; uint32_t robust_priv_list_offset; uint32_t robust_inact_offset; }; static int __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) { struct umtx_robust_lists_params rb; struct umtx_robust_lists_params_compat32 rb32; int error; if (uap->val > sizeof(rb32)) return (EINVAL); bzero(&rb, sizeof(rb)); bzero(&rb32, sizeof(rb32)); error = copyin(uap->uaddr1, &rb32, uap->val); if (error != 0) return (error); rb.robust_list_offset = rb32.robust_list_offset; rb.robust_priv_list_offset = rb32.robust_priv_list_offset; rb.robust_inact_offset = rb32.robust_inact_offset; return (umtx_robust_lists(td, &rb)); } static const _umtx_op_func op_table_compat32[] = { [UMTX_OP_RESERVED0] = __umtx_op_unimpl, [UMTX_OP_RESERVED1] = __umtx_op_unimpl, [UMTX_OP_WAIT] = __umtx_op_wait_compat32, [UMTX_OP_WAKE] = __umtx_op_wake, [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, #else [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, #endif [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, [UMTX_OP_SHM] = __umtx_op_shm, [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, }; int freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) { if ((unsigned)uap->op < nitems(op_table_compat32)) { return (*op_table_compat32[uap->op])(td, (struct _umtx_op_args *)uap); } return (EINVAL); } #endif void umtx_thread_init(struct thread *td) { td->td_umtxq = umtxq_alloc(); td->td_umtxq->uq_thread = td; } void umtx_thread_fini(struct thread *td) { umtxq_free(td->td_umtxq); } /* * It will be called when new thread is created, e.g fork(). */ void umtx_thread_alloc(struct thread *td) { struct umtx_q *uq; uq = td->td_umtxq; uq->uq_inherited_pri = PRI_MAX; KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); KASSERT(uq->uq_thread == td, ("uq_thread != td")); KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); } /* * exec() hook. * * Clear robust lists for all process' threads, not delaying the * cleanup to thread_exit hook, since the relevant address space is * destroyed right now. */ static void umtx_exec_hook(void *arg __unused, struct proc *p, struct image_params *imgp __unused) { struct thread *td; KASSERT(p == curproc, ("need curproc")); KASSERT((p->p_flag & P_HADTHREADS) == 0 || (p->p_flag & P_STOPPED_SINGLE) != 0, ("curproc must be single-threaded")); /* * There is no need to lock the list as only this thread can be * running. */ FOREACH_THREAD_IN_PROC(p, td) { KASSERT(td == curthread || ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), ("running thread %p %p", p, td)); umtx_thread_cleanup(td); td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; } } /* * thread_exit() hook. */ void umtx_thread_exit(struct thread *td) { umtx_thread_cleanup(td); } static int umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) { u_long res1; #ifdef COMPAT_FREEBSD32 uint32_t res32; #endif int error; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { error = fueword32((void *)ptr, &res32); if (error == 0) res1 = res32; } else #endif { error = fueword((void *)ptr, &res1); } if (error == 0) *res = res1; else error = EFAULT; return (error); } static void umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) { #ifdef COMPAT_FREEBSD32 struct umutex32 m32; if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { memcpy(&m32, m, sizeof(m32)); *rb_list = m32.m_rb_lnk; } else #endif *rb_list = m->m_rb_lnk; } static int umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) { struct umutex m; int error; KASSERT(td->td_proc == curproc, ("need current vmspace")); error = copyin((void *)rbp, &m, sizeof(m)); if (error != 0) return (error); if (rb_list != NULL) umtx_read_rb_list(td, &m, rb_list); if ((m.m_flags & UMUTEX_ROBUST) == 0) return (EINVAL); if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) /* inact is cleared after unlock, allow the inconsistency */ return (inact ? 0 : EINVAL); return (do_unlock_umutex(td, (struct umutex *)rbp, true)); } static void umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, const char *name) { int error, i; uintptr_t rbp; bool inact; if (rb_list == 0) return; error = umtx_read_uptr(td, rb_list, &rbp); for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { if (rbp == *rb_inact) { inact = true; *rb_inact = 0; } else inact = false; error = umtx_handle_rb(td, rbp, &rbp, inact); } if (i == umtx_max_rb && umtx_verbose_rb) { uprintf("comm %s pid %d: reached umtx %smax rb %d\n", td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); } if (error != 0 && umtx_verbose_rb) { uprintf("comm %s pid %d: handling %srb error %d\n", td->td_proc->p_comm, td->td_proc->p_pid, name, error); } } /* * Clean up umtx data. */ static void umtx_thread_cleanup(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; uintptr_t rb_inact; /* * Disown pi mutexes. */ uq = td->td_umtxq; if (uq != NULL) { if (uq->uq_inherited_pri != PRI_MAX || !TAILQ_EMPTY(&uq->uq_pi_contested)) { mtx_lock(&umtx_lock); uq->uq_inherited_pri = PRI_MAX; while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { pi->pi_owner = NULL; TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); } mtx_unlock(&umtx_lock); } sched_lend_user_prio_cond(td, PRI_MAX); } if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) return; /* * Handle terminated robust mutexes. Must be done after * robust pi disown, otherwise unlock could see unowned * entries. */ rb_inact = td->td_rb_inact; if (rb_inact != 0) (void)umtx_read_uptr(td, rb_inact, &rb_inact); umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); if (rb_inact != 0) (void)umtx_handle_rb(td, rb_inact, NULL, true); } diff --git a/sys/mips/mips/support.S b/sys/mips/mips/support.S index c0935bbbd1dd..1b1f3e40fa64 100644 --- a/sys/mips/mips/support.S +++ b/sys/mips/mips/support.S @@ -1,867 +1,859 @@ /* $OpenBSD: locore.S,v 1.18 1998/09/15 10:58:53 pefo Exp $ */ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Digital Equipment Corporation and Ralph Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (C) 1989 Digital Equipment Corporation. * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby granted, * provided that the above copyright notice appears in all copies. * Digital Equipment Corporation makes no representations about the * suitability of this software for any purpose. It is provided "as is" * without express or implied warranty. * * from: Header: /sprite/src/kernel/mach/ds3100.md/RCS/loMem.s, * v 1.1 89/07/11 17:55:04 nelson Exp SPRITE (DECWRL) * from: Header: /sprite/src/kernel/mach/ds3100.md/RCS/machAsm.s, * v 9.2 90/01/29 18:00:39 shirriff Exp SPRITE (DECWRL) * from: Header: /sprite/src/kernel/vm/ds3100.md/vmPmaxAsm.s, * v 1.1 89/07/10 14:27:41 nelson Exp SPRITE (DECWRL) * * from: @(#)locore.s 8.5 (Berkeley) 1/4/94 * JNPR: support.S,v 1.5.2.2 2007/08/29 10:03:49 girish * $FreeBSD$ */ /* * Copyright (c) 1997 Jonathan Stone (hereinafter referred to as the author) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Jonathan R. Stone for * the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Contains assembly language support routines. */ #include "opt_ddb.h" #include #include #include #include #include #include #include "assym.inc" .set noreorder # Noreorder is default style! /* * Primitives */ .text /* * int copystr(void *kfaddr, void *kdaddr, size_t maxlen, size_t *lencopied) * Copy a NIL-terminated string, at most maxlen characters long. Return the * number of characters copied (including the NIL) in *lencopied. If the * string is too long, return ENAMETOOLONG; else return 0. */ LEAF(copystr) move t0, a2 beq a2, zero, 4f 1: lbu v0, 0(a0) PTR_SUBU a2, a2, 1 beq v0, zero, 2f sb v0, 0(a1) # each byte until NIL PTR_ADDU a0, a0, 1 bne a2, zero, 1b # less than maxlen PTR_ADDU a1, a1, 1 4: li v0, ENAMETOOLONG # run out of space 2: beq a3, zero, 3f # return num. of copied bytes PTR_SUBU a2, t0, a2 # if the 4th arg was non-NULL PTR_S a2, 0(a3) 3: j ra # v0 is 0 or ENAMETOOLONG nop END(copystr) /* * Copy a null terminated string from the user address space into * the kernel address space. * * copyinstr(fromaddr, toaddr, maxlength, &lencopied) * caddr_t fromaddr; * caddr_t toaddr; * u_int maxlength; * u_int *lencopied; */ NESTED(copyinstr, CALLFRAME_SIZ, ra) PTR_SUBU sp, sp, CALLFRAME_SIZ .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) PTR_LA v0, copyerr blt a0, zero, _C_LABEL(copyerr) # make sure address is in user space REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(copystr) PTR_S v0, U_PCB_ONFAULT(v1) REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S zero, U_PCB_ONFAULT(v1) j ra PTR_ADDU sp, sp, CALLFRAME_SIZ END(copyinstr) /* * Copy specified amount of data from user space into the kernel * copyin(from, to, len) * caddr_t *from; (user source address) * caddr_t *to; (kernel destination address) * unsigned len; */ NESTED(copyin, CALLFRAME_SIZ, ra) PTR_SUBU sp, sp, CALLFRAME_SIZ .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) PTR_LA v0, copyerr blt a0, zero, _C_LABEL(copyerr) # make sure address is in user space REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(bcopy) PTR_S v0, U_PCB_ONFAULT(v1) REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) # bcopy modified v1, so reload PTR_S zero, U_PCB_ONFAULT(v1) PTR_ADDU sp, sp, CALLFRAME_SIZ j ra move v0, zero END(copyin) /* * Copy specified amount of data from kernel to the user space * copyout(from, to, len) * caddr_t *from; (kernel source address) * caddr_t *to; (user destination address) * unsigned len; */ NESTED(copyout, CALLFRAME_SIZ, ra) PTR_SUBU sp, sp, CALLFRAME_SIZ .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) PTR_LA v0, copyerr blt a1, zero, _C_LABEL(copyerr) # make sure address is in user space REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(bcopy) PTR_S v0, U_PCB_ONFAULT(v1) REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) # bcopy modified v1, so reload PTR_S zero, U_PCB_ONFAULT(v1) PTR_ADDU sp, sp, CALLFRAME_SIZ j ra move v0, zero END(copyout) LEAF(copyerr) REG_L ra, CALLFRAME_RA(sp) PTR_ADDU sp, sp, CALLFRAME_SIZ j ra li v0, EFAULT # return error END(copyerr) /* * {fu,su},{byte,sword,word}, fetch or store a byte, short or word to * user-space. */ #ifdef __mips_n64 LEAF(fueword64) XLEAF(fueword) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) ld v0, 0(a0) # fetch word PTR_S zero, U_PCB_ONFAULT(v1) sd v0, 0(a1) # store word j ra li v0, 0 END(fueword64) #endif LEAF(fueword32) #ifndef __mips_n64 XLEAF(fueword) #endif PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) lw v0, 0(a0) # fetch word PTR_S zero, U_PCB_ONFAULT(v1) sw v0, 0(a1) # store word j ra li v0, 0 END(fueword32) LEAF(fuesword) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) lhu v0, 0(a0) # fetch short PTR_S zero, U_PCB_ONFAULT(v1) sh v0, 0(a1) # store short j ra li v0, 0 END(fuesword) LEAF(fubyte) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) lbu v0, 0(a0) # fetch byte j ra PTR_S zero, U_PCB_ONFAULT(v1) END(fubyte) LEAF(suword32) #ifndef __mips_n64 XLEAF(suword) #endif PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) sw a1, 0(a0) # store word PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(suword32) #ifdef __mips_n64 LEAF(suword64) XLEAF(suword) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) sd a1, 0(a0) # store word PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(suword64) #endif /* * casueword(9) * u_long casueword(u_long *p, u_long oldval, u_long *oldval_p, * u_long newval) */ /* * casueword32(9) * uint32_t casueword(uint32_t *p, uint32_t oldval, * uint32_t newval) */ LEAF(casueword32) #ifndef __mips_n64 XLEAF(casueword) #endif PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) -1: + + li v0, 1 move t0, a3 ll t1, 0(a0) - bne a1, t1, 2f + bne a1, t1, 1f nop sc t0, 0(a0) # store word - beqz t0, 1b - nop - j 3f - li v0, 0 -2: - li v0, -1 -3: + xori v0, t0, 1 +1: PTR_S zero, U_PCB_ONFAULT(v1) jr ra sw t1, 0(a2) # unconditionally store old word END(casueword32) #ifdef __mips_n64 LEAF(casueword64) XLEAF(casueword) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) -1: + + li v0, 1 move t0, a3 lld t1, 0(a0) - bne a1, t1, 2f + bne a1, t1, 1f nop scd t0, 0(a0) # store double word - beqz t0, 1b - nop - j 3f - li v0, 0 -2: - li v0, -1 -3: + xori v0, t0, 1 +1: PTR_S zero, U_PCB_ONFAULT(v1) jr ra sd t1, 0(a2) # unconditionally store old word END(casueword64) #endif /* * Will have to flush the instruction cache if byte merging is done in hardware. */ LEAF(susword) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) sh a1, 0(a0) # store short PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(susword) LEAF(subyte) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space nop GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) sb a1, 0(a0) # store byte PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(subyte) LEAF(fswberr) j ra li v0, -1 END(fswberr) /* * memset(void *s1, int c, int len) * NetBSD: memset.S,v 1.3 2001/10/16 15:40:53 uch Exp */ LEAF(memset) .set noreorder blt a2, 12, memsetsmallclr # small amount to clear? move v0, a0 # save s1 for result sll t1, a1, 8 # compute c << 8 in t1 or t1, t1, a1 # compute c << 8 | c in 11 sll t2, t1, 16 # shift that left 16 or t1, t2, t1 # or together PTR_SUBU t0, zero, a0 # compute # bytes to word align address and t0, t0, 3 beq t0, zero, 1f # skip if word aligned PTR_SUBU a2, a2, t0 # subtract from remaining count SWHI t1, 0(a0) # store 1, 2, or 3 bytes to align PTR_ADDU a0, a0, t0 1: and v1, a2, 3 # compute number of whole words left PTR_SUBU t0, a2, v1 PTR_SUBU a2, a2, t0 PTR_ADDU t0, t0, a0 # compute ending address 2: PTR_ADDU a0, a0, 4 # clear words bne a0, t0, 2b # unrolling loop does not help sw t1, -4(a0) # since we are limited by memory speed memsetsmallclr: ble a2, zero, 2f PTR_ADDU t0, a2, a0 # compute ending address 1: PTR_ADDU a0, a0, 1 # clear bytes bne a0, t0, 1b sb a1, -1(a0) 2: j ra nop .set reorder END(memset) /* * bzero(s1, n) */ LEAF(bzero) XLEAF(blkclr) .set noreorder blt a1, 12, smallclr # small amount to clear? PTR_SUBU a3, zero, a0 # compute # bytes to word align address and a3, a3, 3 beq a3, zero, 1f # skip if word aligned PTR_SUBU a1, a1, a3 # subtract from remaining count SWHI zero, 0(a0) # clear 1, 2, or 3 bytes to align PTR_ADDU a0, a0, a3 1: and v0, a1, 3 # compute number of words left PTR_SUBU a3, a1, v0 move a1, v0 PTR_ADDU a3, a3, a0 # compute ending address 2: PTR_ADDU a0, a0, 4 # clear words bne a0, a3, 2b # unrolling loop does not help sw zero, -4(a0) # since we are limited by memory speed smallclr: ble a1, zero, 2f PTR_ADDU a3, a1, a0 # compute ending address 1: PTR_ADDU a0, a0, 1 # clear bytes bne a0, a3, 1b sb zero, -1(a0) 2: j ra nop END(bzero) /* * bcmp(s1, s2, n) */ LEAF(bcmp) .set noreorder blt a2, 16, smallcmp # is it worth any trouble? xor v0, a0, a1 # compare low two bits of addresses and v0, v0, 3 PTR_SUBU a3, zero, a1 # compute # bytes to word align address bne v0, zero, unalignedcmp # not possible to align addresses and a3, a3, 3 beq a3, zero, 1f PTR_SUBU a2, a2, a3 # subtract from remaining count move v0, v1 # init v0,v1 so unmodified bytes match LWHI v0, 0(a0) # read 1, 2, or 3 bytes LWHI v1, 0(a1) PTR_ADDU a1, a1, a3 bne v0, v1, nomatch PTR_ADDU a0, a0, a3 1: and a3, a2, ~3 # compute number of whole words left PTR_SUBU a2, a2, a3 # which has to be >= (16-3) & ~3 PTR_ADDU a3, a3, a0 # compute ending address 2: lw v0, 0(a0) # compare words lw v1, 0(a1) PTR_ADDU a0, a0, 4 bne v0, v1, nomatch PTR_ADDU a1, a1, 4 bne a0, a3, 2b nop b smallcmp # finish remainder nop unalignedcmp: beq a3, zero, 2f PTR_SUBU a2, a2, a3 # subtract from remaining count PTR_ADDU a3, a3, a0 # compute ending address 1: lbu v0, 0(a0) # compare bytes until a1 word aligned lbu v1, 0(a1) PTR_ADDU a0, a0, 1 bne v0, v1, nomatch PTR_ADDU a1, a1, 1 bne a0, a3, 1b nop 2: and a3, a2, ~3 # compute number of whole words left PTR_SUBU a2, a2, a3 # which has to be >= (16-3) & ~3 PTR_ADDU a3, a3, a0 # compute ending address 3: LWHI v0, 0(a0) # compare words a0 unaligned, a1 aligned LWLO v0, 3(a0) lw v1, 0(a1) PTR_ADDU a0, a0, 4 bne v0, v1, nomatch PTR_ADDU a1, a1, 4 bne a0, a3, 3b nop smallcmp: ble a2, zero, match PTR_ADDU a3, a2, a0 # compute ending address 1: lbu v0, 0(a0) lbu v1, 0(a1) PTR_ADDU a0, a0, 1 bne v0, v1, nomatch PTR_ADDU a1, a1, 1 bne a0, a3, 1b nop match: j ra move v0, zero nomatch: j ra li v0, 1 END(bcmp) /* * bit = ffs(value) */ LEAF(ffs) .set noreorder beq a0, zero, 2f move v0, zero 1: and v1, a0, 1 # bit set? addu v0, v0, 1 beq v1, zero, 1b # no, continue srl a0, a0, 1 2: j ra nop END(ffs) /** * void * atomic_set_16(u_int16_t *a, u_int16_t b) * { * *a |= b; * } */ LEAF(atomic_set_16) .set noreorder srl a0, a0, 2 # round down address to be 32-bit aligned sll a0, a0, 2 andi a1, a1, 0xffff 1: ll t0, 0(a0) or t0, t0, a1 sc t0, 0(a0) beq t0, zero, 1b nop j ra nop END(atomic_set_16) /** * void * atomic_clear_16(u_int16_t *a, u_int16_t b) * { * *a &= ~b; * } */ LEAF(atomic_clear_16) .set noreorder srl a0, a0, 2 # round down address to be 32-bit aligned sll a0, a0, 2 nor a1, zero, a1 1: ll t0, 0(a0) move t1, t0 andi t1, t1, 0xffff # t1 has the original lower 16 bits and t1, t1, a1 # t1 has the new lower 16 bits srl t0, t0, 16 # preserve original top 16 bits sll t0, t0, 16 or t0, t0, t1 sc t0, 0(a0) beq t0, zero, 1b nop j ra nop END(atomic_clear_16) /** * void * atomic_subtract_16(uint16_t *a, uint16_t b) * { * *a -= b; * } */ LEAF(atomic_subtract_16) .set noreorder srl a0, a0, 2 # round down address to be 32-bit aligned sll a0, a0, 2 1: ll t0, 0(a0) move t1, t0 andi t1, t1, 0xffff # t1 has the original lower 16 bits subu t1, t1, a1 andi t1, t1, 0xffff # t1 has the new lower 16 bits srl t0, t0, 16 # preserve original top 16 bits sll t0, t0, 16 or t0, t0, t1 sc t0, 0(a0) beq t0, zero, 1b nop j ra nop END(atomic_subtract_16) /** * void * atomic_add_16(uint16_t *a, uint16_t b) * { * *a += b; * } */ LEAF(atomic_add_16) .set noreorder srl a0, a0, 2 # round down address to be 32-bit aligned sll a0, a0, 2 1: ll t0, 0(a0) move t1, t0 andi t1, t1, 0xffff # t1 has the original lower 16 bits addu t1, t1, a1 andi t1, t1, 0xffff # t1 has the new lower 16 bits srl t0, t0, 16 # preserve original top 16 bits sll t0, t0, 16 or t0, t0, t1 sc t0, 0(a0) beq t0, zero, 1b nop j ra nop END(atomic_add_16) /** * void * atomic_add_8(uint8_t *a, uint8_t b) * { * *a += b; * } */ LEAF(atomic_add_8) .set noreorder srl a0, a0, 2 # round down address to be 32-bit aligned sll a0, a0, 2 1: ll t0, 0(a0) move t1, t0 andi t1, t1, 0xff # t1 has the original lower 8 bits addu t1, t1, a1 andi t1, t1, 0xff # t1 has the new lower 8 bits srl t0, t0, 8 # preserve original top 24 bits sll t0, t0, 8 or t0, t0, t1 sc t0, 0(a0) beq t0, zero, 1b nop j ra nop END(atomic_add_8) /** * void * atomic_subtract_8(uint8_t *a, uint8_t b) * { * *a += b; * } */ LEAF(atomic_subtract_8) .set noreorder srl a0, a0, 2 # round down address to be 32-bit aligned sll a0, a0, 2 1: ll t0, 0(a0) move t1, t0 andi t1, t1, 0xff # t1 has the original lower 8 bits subu t1, t1, a1 andi t1, t1, 0xff # t1 has the new lower 8 bits srl t0, t0, 8 # preserve original top 24 bits sll t0, t0, 8 or t0, t0, t1 sc t0, 0(a0) beq t0, zero, 1b nop j ra nop END(atomic_subtract_8) .set noreorder # Noreorder is default style! #if defined(DDB) || defined(DEBUG) LEAF(kdbpeek) PTR_LA v1, ddberr and v0, a0, 3 # unaligned ? GET_CPU_PCPU(t1) PTR_L t1, PC_CURPCB(t1) bne v0, zero, 1f PTR_S v1, U_PCB_ONFAULT(t1) lw v0, (a0) jr ra PTR_S zero, U_PCB_ONFAULT(t1) 1: LWHI v0, 0(a0) LWLO v0, 3(a0) jr ra PTR_S zero, U_PCB_ONFAULT(t1) END(kdbpeek) LEAF(kdbpeekd) PTR_LA v1, ddberr and v0, a0, 3 # unaligned ? GET_CPU_PCPU(t1) PTR_L t1, PC_CURPCB(t1) bne v0, zero, 1f PTR_S v1, U_PCB_ONFAULT(t1) ld v0, (a0) jr ra PTR_S zero, U_PCB_ONFAULT(t1) 1: REG_LHI v0, 0(a0) REG_LLO v0, 7(a0) jr ra PTR_S zero, U_PCB_ONFAULT(t1) END(kdbpeekd) ddberr: jr ra nop #if defined(DDB) LEAF(kdbpoke) PTR_LA v1, ddberr and v0, a0, 3 # unaligned ? GET_CPU_PCPU(t1) PTR_L t1, PC_CURPCB(t1) bne v0, zero, 1f PTR_S v1, U_PCB_ONFAULT(t1) sw a1, (a0) jr ra PTR_S zero, U_PCB_ONFAULT(t1) 1: SWHI a1, 0(a0) SWLO a1, 3(a0) jr ra PTR_S zero, U_PCB_ONFAULT(t1) END(kdbpoke) .data .globl esym esym: .word 0 #endif /* DDB */ #endif /* DDB || DEBUG */ .text LEAF(breakpoint) break MIPS_BREAK_SOVER_VAL jr ra nop END(breakpoint) LEAF(setjmp) mfc0 v0, MIPS_COP_0_STATUS # Later the "real" spl value! REG_S s0, (SZREG * PCB_REG_S0)(a0) REG_S s1, (SZREG * PCB_REG_S1)(a0) REG_S s2, (SZREG * PCB_REG_S2)(a0) REG_S s3, (SZREG * PCB_REG_S3)(a0) REG_S s4, (SZREG * PCB_REG_S4)(a0) REG_S s5, (SZREG * PCB_REG_S5)(a0) REG_S s6, (SZREG * PCB_REG_S6)(a0) REG_S s7, (SZREG * PCB_REG_S7)(a0) REG_S s8, (SZREG * PCB_REG_S8)(a0) REG_S sp, (SZREG * PCB_REG_SP)(a0) REG_S ra, (SZREG * PCB_REG_RA)(a0) REG_S v0, (SZREG * PCB_REG_SR)(a0) jr ra li v0, 0 # setjmp return END(setjmp) LEAF(longjmp) REG_L v0, (SZREG * PCB_REG_SR)(a0) REG_L ra, (SZREG * PCB_REG_RA)(a0) REG_L s0, (SZREG * PCB_REG_S0)(a0) REG_L s1, (SZREG * PCB_REG_S1)(a0) REG_L s2, (SZREG * PCB_REG_S2)(a0) REG_L s3, (SZREG * PCB_REG_S3)(a0) REG_L s4, (SZREG * PCB_REG_S4)(a0) REG_L s5, (SZREG * PCB_REG_S5)(a0) REG_L s6, (SZREG * PCB_REG_S6)(a0) REG_L s7, (SZREG * PCB_REG_S7)(a0) REG_L s8, (SZREG * PCB_REG_S8)(a0) REG_L sp, (SZREG * PCB_REG_SP)(a0) mtc0 v0, MIPS_COP_0_STATUS # Later the "real" spl value! ITLBNOPFIX jr ra li v0, 1 # longjmp return END(longjmp) diff --git a/sys/powerpc/powerpc/copyinout.c b/sys/powerpc/powerpc/copyinout.c index e44f302c9c27..697866a83cc4 100644 --- a/sys/powerpc/powerpc/copyinout.c +++ b/sys/powerpc/powerpc/copyinout.c @@ -1,523 +1,529 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND BSD-4-Clause * * Copyright (C) 2002 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1993 Wolfgang Solfrank. * Copyright (C) 1993 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include int copyout(const void *kaddr, void *udaddr, size_t len) { struct thread *td; pmap_t pm; jmp_buf env; const char *kp; char *up, *p; size_t l; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } kp = kaddr; up = udaddr; while (len > 0) { if (pmap_map_user_ptr(pm, up, (void **)&p, len, &l)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } bcopy(kp, p, l); up += l; kp += l; len -= l; } td->td_pcb->pcb_onfault = NULL; return (0); } int copyin(const void *udaddr, void *kaddr, size_t len) { struct thread *td; pmap_t pm; jmp_buf env; const char *up; char *kp, *p; size_t l; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } kp = kaddr; up = udaddr; while (len > 0) { if (pmap_map_user_ptr(pm, up, (void **)&p, len, &l)) { td->td_pcb->pcb_onfault = NULL; return (EFAULT); } bcopy(p, kp, l); up += l; kp += l; len -= l; } td->td_pcb->pcb_onfault = NULL; return (0); } int copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done) { const char *up; char *kp; size_t l; int rv, c; kp = kaddr; up = udaddr; rv = ENAMETOOLONG; for (l = 0; len-- > 0; l++) { if ((c = fubyte(up++)) < 0) { rv = EFAULT; break; } if (!(*kp++ = c)) { l++; rv = 0; break; } } if (done != NULL) { *done = l; } return (rv); } int subyte(volatile void *addr, int byte) { struct thread *td; pmap_t pm; jmp_buf env; char *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *p = (char)byte; td->td_pcb->pcb_onfault = NULL; return (0); } #ifdef __powerpc64__ int suword32(volatile void *addr, int word) { struct thread *td; pmap_t pm; jmp_buf env; int *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *p = word; td->td_pcb->pcb_onfault = NULL; return (0); } #endif int suword(volatile void *addr, long word) { struct thread *td; pmap_t pm; jmp_buf env; long *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *p = word; td->td_pcb->pcb_onfault = NULL; return (0); } #ifdef __powerpc64__ int suword64(volatile void *addr, int64_t word) { return (suword(addr, (long)word)); } #else int suword32(volatile void *addr, int32_t word) { return (suword(addr, (long)word)); } #endif int fubyte(volatile const void *addr) { struct thread *td; pmap_t pm; jmp_buf env; u_char *p; int val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } val = *p; td->td_pcb->pcb_onfault = NULL; return (val); } int fuword16(volatile const void *addr) { struct thread *td; pmap_t pm; jmp_buf env; uint16_t *p, val; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } val = *p; td->td_pcb->pcb_onfault = NULL; return (val); } int fueword32(volatile const void *addr, int32_t *val) { struct thread *td; pmap_t pm; jmp_buf env; int32_t *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *val = *p; td->td_pcb->pcb_onfault = NULL; return (0); } #ifdef __powerpc64__ int fueword64(volatile const void *addr, int64_t *val) { struct thread *td; pmap_t pm; jmp_buf env; int64_t *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *val = *p; td->td_pcb->pcb_onfault = NULL; return (0); } #endif int fueword(volatile const void *addr, long *val) { struct thread *td; pmap_t pm; jmp_buf env; long *p; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } *val = *p; td->td_pcb->pcb_onfault = NULL; return (0); } int casueword32(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp, uint32_t new) { struct thread *td; pmap_t pm; jmp_buf env; uint32_t *p, val; + int res; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, (void *)(uintptr_t)addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } + res = 0; __asm __volatile ( - "1:\tlwarx %0, 0, %2\n\t" /* load old value */ - "cmplw %3, %0\n\t" /* compare */ - "bne 2f\n\t" /* exit if not equal */ - "stwcx. %4, 0, %2\n\t" /* attempt to store */ - "bne- 1b\n\t" /* spin if failed */ - "b 3f\n\t" /* we've succeeded */ + "lwarx %0, 0, %3\n\t" /* load old value */ + "cmplw %4, %0\n\t" /* compare */ + "bne 1f\n\t" /* exit if not equal */ + "stwcx. %5, 0, %3\n\t" /* attempt to store */ + "bne- 1f\n\t" /* if failed */ + "b 2f\n\t" /* we've succeeded */ + "1:\n\t" + "stwcx. %0, 0, %4\n\t" /* clear reservation (74xx) */ + "li %2, 1\n\t" "2:\n\t" - "stwcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ - "3:\n\t" - : "=&r" (val), "=m" (*p) + : "=&r" (val), "=m" (*p), "=&r" (res) : "r" (p), "r" (old), "r" (new), "m" (*p) : "cr0", "memory"); td->td_pcb->pcb_onfault = NULL; *oldvalp = val; - return (0); + return (res); } #ifndef __powerpc64__ int casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new) { return (casueword32((volatile uint32_t *)addr, old, (uint32_t *)oldvalp, new)); } #else int casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new) { struct thread *td; pmap_t pm; jmp_buf env; u_long *p, val; + int res; td = curthread; pm = &td->td_proc->p_vmspace->vm_pmap; td->td_pcb->pcb_onfault = &env; if (setjmp(env)) { td->td_pcb->pcb_onfault = NULL; return (-1); } if (pmap_map_user_ptr(pm, (void *)(uintptr_t)addr, (void **)&p, sizeof(*p), NULL)) { td->td_pcb->pcb_onfault = NULL; return (-1); } + res = 0; __asm __volatile ( - "1:\tldarx %0, 0, %2\n\t" /* load old value */ - "cmpld %3, %0\n\t" /* compare */ - "bne 2f\n\t" /* exit if not equal */ - "stdcx. %4, 0, %2\n\t" /* attempt to store */ - "bne- 1b\n\t" /* spin if failed */ - "b 3f\n\t" /* we've succeeded */ + "ldarx %0, 0, %3\n\t" /* load old value */ + "cmpld %4, %0\n\t" /* compare */ + "bne 1f\n\t" /* exit if not equal */ + "stdcx. %5, 0, %3\n\t" /* attempt to store */ + "bne- 1f\n\t" /* if failed */ + "b 2f\n\t" /* we've succeeded */ + "1:\n\t" + "stdcx. %0, 0, %3\n\t" /* clear reservation (74xx) */ + "li %2, 1\n\t" "2:\n\t" - "stdcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ - "3:\n\t" - : "=&r" (val), "=m" (*p) + : "=&r" (val), "=m" (*p), "=&r" (res) : "r" (p), "r" (old), "r" (new), "m" (*p) : "cr0", "memory"); td->td_pcb->pcb_onfault = NULL; *oldvalp = val; - return (0); + return (res); } #endif diff --git a/sys/riscv/riscv/support.S b/sys/riscv/riscv/support.S index e59302bea9aa..1de905fdf843 100644 --- a/sys/riscv/riscv/support.S +++ b/sys/riscv/riscv/support.S @@ -1,275 +1,277 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "assym.inc" /* * One of the fu* or su* functions failed, return -1. */ ENTRY(fsu_fault) SET_FAULT_HANDLER(x0, a1) /* Reset the handler function */ EXIT_USER_ACCESS(a1) fsu_fault_nopcb: li a0, -1 ret END(fsu_fault) /* * int casueword32(volatile uint32_t *, uint32_t, uint32_t *, uint32_t) */ ENTRY(casueword32) li a4, (VM_MAXUSER_ADDRESS-3) bgt a0, a4, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a4) /* And set it */ ENTER_USER_ACCESS(a4) -1: lr.w a4, 0(a0) /* Load-exclusive the data */ - bne a4, a1, 2f /* If not equal then exit */ + lr.w a4, 0(a0) /* Load-exclusive the data */ + bne a4, a1, 1f /* If not equal then exit */ sc.w a5, a3, 0(a0) /* Store the new data */ - bnez a5, 1b /* Retry on failure */ -2: EXIT_USER_ACCESS(a5) - SET_FAULT_HANDLER(x0, a5) /* Reset the fault handler */ + beqz a5, 2f /* Success */ +1: li a5, 1 /* Normalize failure result */ +2: EXIT_USER_ACCESS(a6) + SET_FAULT_HANDLER(x0, a6) /* Reset the fault handler */ sw a4, 0(a2) /* Store the read data */ - li a0, 0 /* Success */ + mv a0, a5 /* Success indicator */ ret /* Return */ END(casueword32) /* * int casueword(volatile u_long *, u_long, u_long *, u_long) */ ENTRY(casueword) li a4, (VM_MAXUSER_ADDRESS-7) bgt a0, a4, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a4) /* And set it */ ENTER_USER_ACCESS(a4) -1: lr.d a4, 0(a0) /* Load-exclusive the data */ - bne a4, a1, 2f /* If not equal then exit */ + lr.d a4, 0(a0) /* Load-exclusive the data */ + bne a4, a1, 1f /* If not equal then exit */ sc.d a5, a3, 0(a0) /* Store the new data */ - bnez a5, 1b /* Retry on failure */ -2: EXIT_USER_ACCESS(a5) - SET_FAULT_HANDLER(x0, a5) /* Reset the fault handler */ + beqz a5, 2f /* Success */ +1: li a5, 1 /* Normalize failure result */ +2: EXIT_USER_ACCESS(a6) + SET_FAULT_HANDLER(x0, a6) /* Reset the fault handler */ sd a4, 0(a2) /* Store the read data */ - li a0, 0 /* Success */ + mv a0, a5 /* Success indicator */ ret /* Return */ END(casueword) /* * int fubyte(volatile const void *) */ ENTRY(fubyte) li a1, VM_MAXUSER_ADDRESS bgt a0, a1, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a1) /* And set it */ ENTER_USER_ACCESS(a1) lb a0, 0(a0) /* Try loading the data */ EXIT_USER_ACCESS(a1) SET_FAULT_HANDLER(x0, a1) /* Reset the fault handler */ ret /* Return */ END(fubyte) /* * int fuword(volatile const void *) */ ENTRY(fuword16) li a1, (VM_MAXUSER_ADDRESS-1) bgt a0, a1, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a1) /* And set it */ ENTER_USER_ACCESS(a1) lh a0, 0(a0) /* Try loading the data */ EXIT_USER_ACCESS(a1) SET_FAULT_HANDLER(x0, a1) /* Reset the fault handler */ ret /* Return */ END(fuword16) /* * int32_t fueword32(volatile const void *, int32_t *) */ ENTRY(fueword32) li a2, (VM_MAXUSER_ADDRESS-3) bgt a0, a2, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a2) /* And set it */ ENTER_USER_ACCESS(a2) lw a0, 0(a0) /* Try loading the data */ EXIT_USER_ACCESS(a2) SET_FAULT_HANDLER(x0, a2) /* Reset the fault handler */ sw a0, 0(a1) /* Save the data in kernel space */ li a0, 0 /* Success */ ret /* Return */ END(fueword32) /* * long fueword(volatile const void *, int64_t *) * int64_t fueword64(volatile const void *, int64_t *) */ ENTRY(fueword) EENTRY(fueword64) li a2, (VM_MAXUSER_ADDRESS-7) bgt a0, a2, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a2) /* And set it */ ENTER_USER_ACCESS(a2) ld a0, 0(a0) /* Try loading the data */ EXIT_USER_ACCESS(a2) SET_FAULT_HANDLER(x0, a2) /* Reset the fault handler */ sd a0, 0(a1) /* Save the data in kernel space */ li a0, 0 /* Success */ ret /* Return */ EEND(fueword64) END(fueword) /* * int subyte(volatile void *, int) */ ENTRY(subyte) li a2, VM_MAXUSER_ADDRESS bgt a0, a2, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a2) /* And set it */ ENTER_USER_ACCESS(a2) sb a1, 0(a0) /* Try storing the data */ EXIT_USER_ACCESS(a2) SET_FAULT_HANDLER(x0, a2) /* Reset the fault handler */ li a0, 0 /* Success */ ret /* Return */ END(subyte) /* * int suword16(volatile void *, int) */ ENTRY(suword16) li a2, (VM_MAXUSER_ADDRESS-1) bgt a0, a2, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a2) /* And set it */ ENTER_USER_ACCESS(a2) sh a1, 0(a0) /* Try storing the data */ EXIT_USER_ACCESS(a2) SET_FAULT_HANDLER(x0, a2) /* Reset the fault handler */ li a0, 0 /* Success */ ret /* Return */ END(suword16) /* * int suword32(volatile void *, int) */ ENTRY(suword32) li a2, (VM_MAXUSER_ADDRESS-3) bgt a0, a2, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a2) /* And set it */ ENTER_USER_ACCESS(a2) sw a1, 0(a0) /* Try storing the data */ EXIT_USER_ACCESS(a2) SET_FAULT_HANDLER(x0, a2) /* Reset the fault handler */ li a0, 0 /* Success */ ret /* Return */ END(suword32) /* * int suword(volatile void *, long) */ ENTRY(suword) EENTRY(suword64) li a2, (VM_MAXUSER_ADDRESS-7) bgt a0, a2, fsu_fault_nopcb la a6, fsu_fault /* Load the fault handler */ SET_FAULT_HANDLER(a6, a2) /* And set it */ ENTER_USER_ACCESS(a2) sd a1, 0(a0) /* Try storing the data */ EXIT_USER_ACCESS(a2) SET_FAULT_HANDLER(x0, a2) /* Reset the fault handler */ li a0, 0 /* Success */ ret /* Return */ EEND(suword64) END(suword) ENTRY(setjmp) /* Store the stack pointer */ sd sp, 0(a0) addi a0, a0, 8 /* Store the general purpose registers and ra */ sd s0, (0 * 8)(a0) sd s1, (1 * 8)(a0) sd s2, (2 * 8)(a0) sd s3, (3 * 8)(a0) sd s4, (4 * 8)(a0) sd s5, (5 * 8)(a0) sd s6, (6 * 8)(a0) sd s7, (7 * 8)(a0) sd s8, (8 * 8)(a0) sd s9, (9 * 8)(a0) sd s10, (10 * 8)(a0) sd s11, (11 * 8)(a0) sd ra, (12 * 8)(a0) /* Return value */ li a0, 0 ret END(setjmp) ENTRY(longjmp) /* Restore the stack pointer */ ld sp, 0(a0) addi a0, a0, 8 /* Restore the general purpose registers and ra */ ld s0, (0 * 8)(a0) ld s1, (1 * 8)(a0) ld s2, (2 * 8)(a0) ld s3, (3 * 8)(a0) ld s4, (4 * 8)(a0) ld s5, (5 * 8)(a0) ld s6, (6 * 8)(a0) ld s7, (7 * 8)(a0) ld s8, (8 * 8)(a0) ld s9, (9 * 8)(a0) ld s10, (10 * 8)(a0) ld s11, (11 * 8)(a0) ld ra, (12 * 8)(a0) /* Load the return value */ mv a0, a1 ret END(longjmp) diff --git a/sys/sparc64/sparc64/support.S b/sys/sparc64/sparc64/support.S index 5b9dd68d110a..61fef499fd31 100644 --- a/sys/sparc64/sparc64/support.S +++ b/sys/sparc64/sparc64/support.S @@ -1,944 +1,943 @@ /*- * Copyright (c) 2001 Jake Burkholder. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include "assym.inc" .register %g2, #ignore .register %g3, #ignore .register %g6, #ignore /* * Common code for copy routines. * * We use large macros to generate functions for each of the copy routines. * This allows the load and store instructions to be generated for the right * operation, asi or not. It is possible to write an asi independent function * but this would require 2 expensive wrs in the main loop to switch %asi. * It would also screw up profiling (if we ever get it), but may save some I$. * We assume that either one of dasi and sasi is empty, or that they are both * the same (empty or non-empty). It is up to the caller to set %asi. */ /* * ASI independent implementation of copystr(9). * Used to implement copyinstr() and copystr(). * * Return value is in %g1. */ #define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \ brz len, 4f ; \ mov src, %g2 ; \ 1: deccc 1, len ; \ bl,a,pn %xcc, 3f ; \ nop ; \ LD(ub, sa) [src] sasi, %g1 ; \ ST(b, da) %g1, [dst] dasi ; \ brz,pn %g1, 3f ; \ inc src ; \ ba %xcc, 1b ; \ inc dst ; \ 2: mov ENAMETOOLONG, %g1 ; \ 3: sub src, %g2, %g2 ; \ brnz,a done, 4f ; \ stx %g2, [done] ; \ 4: /* * ASI independent implementation of memset(3). * Used to implement bzero(), memset() and aszero(). * * If the pattern is non-zero, duplicate it to fill 64 bits. * Store bytes until dst is 8-byte aligned, then store 8 bytes. * It has yet to be determined how much unrolling is beneficial. * Could also read and compare before writing to minimize snoop traffic. * * XXX bzero() should be implemented as * #define bzero(dst, len) (void)memset((dst), 0, (len)) * if at all. */ #define _MEMSET(dst, pat, len, da, dasi) \ brlez,pn len, 5f ; \ and pat, 0xff, pat ; \ brz,pt pat, 1f ; \ sllx pat, 8, %g1 ; \ or pat, %g1, pat ; \ sllx pat, 16, %g1 ; \ or pat, %g1, pat ; \ sllx pat, 32, %g1 ; \ or pat, %g1, pat ; \ .align 16 ; \ 1: deccc 1, len ; \ bl,pn %xcc, 5f ; \ btst 7, dst ; \ bz,a,pt %xcc, 2f ; \ inc 1, len ; \ ST(b, da) pat, [dst] dasi ; \ ba %xcc, 1b ; \ inc dst ; \ .align 16 ; \ 2: deccc 32, len ; \ bl,a,pn %xcc, 3f ; \ inc 32, len ; \ ST(x, da) pat, [dst] dasi ; \ ST(x, da) pat, [dst + 8] dasi ; \ ST(x, da) pat, [dst + 16] dasi ; \ ST(x, da) pat, [dst + 24] dasi ; \ ba %xcc, 2b ; \ inc 32, dst ; \ .align 16 ; \ 3: deccc 8, len ; \ bl,a,pn %xcc, 4f ; \ inc 8, len ; \ ST(x, da) pat, [dst] dasi ; \ ba %xcc, 3b ; \ inc 8, dst ; \ .align 16 ; \ 4: deccc 1, len ; \ bl,a,pn %xcc, 5f ; \ nop ; \ ST(b, da) pat, [dst] dasi ; \ ba %xcc, 4b ; \ inc 1, dst ; \ 5: /* * ASI independent implementation of memcpy(3). * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(), * ascopyfrom() and ascopyto(). * * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned * case could be optimized, but it is expected that this is the uncommon * case and of questionable value. The code to do so is also rather large * and ugly. It has yet to be determined how much unrolling is beneficial. * * XXX bcopy() must also check for overlap. This is stupid. * XXX bcopy() should be implemented as * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len)) * if at all. */ #define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \ 1: deccc 1, len ; \ bl,pn %xcc, 6f ; \ btst 7, dst ; \ bz,a,pt %xcc, 2f ; \ inc 1, len ; \ LD(ub, sa) [src] sasi, %g1 ; \ ST(b, da) %g1, [dst] dasi ; \ inc 1, src ; \ ba %xcc, 1b ; \ inc 1, dst ; \ .align 16 ; \ 2: btst 7, src ; \ bz,a,pt %xcc, 3f ; \ nop ; \ ba,a %xcc, 5f ; \ .align 16 ; \ 3: deccc 32, len ; \ bl,a,pn %xcc, 4f ; \ inc 32, len ; \ LD(x, sa) [src] sasi, %g1 ; \ LD(x, sa) [src + 8] sasi, %g2 ; \ LD(x, sa) [src + 16] sasi, %g3 ; \ LD(x, sa) [src + 24] sasi, %g4 ; \ ST(x, da) %g1, [dst] dasi ; \ ST(x, da) %g2, [dst + 8] dasi ; \ ST(x, da) %g3, [dst + 16] dasi ; \ ST(x, da) %g4, [dst + 24] dasi ; \ inc 32, src ; \ ba %xcc, 3b ; \ inc 32, dst ; \ .align 16 ; \ 4: deccc 8, len ; \ bl,a,pn %xcc, 5f ; \ inc 8, len ; \ LD(x, sa) [src] sasi, %g1 ; \ ST(x, da) %g1, [dst] dasi ; \ inc 8, src ; \ ba %xcc, 4b ; \ inc 8, dst ; \ .align 16 ; \ 5: deccc 1, len ; \ bl,a,pn %xcc, 6f ; \ nop ; \ LD(ub, sa) [src] sasi, %g1 ; \ ST(b, da) %g1, [dst] dasi ; \ inc src ; \ ba %xcc, 5b ; \ inc dst ; \ 6: /* * Extension of _MEMCPY dealing with overlap, but unaware of ASIs. * Used for bcopy() and memmove(). */ #define _MEMMOVE(dst, src, len) \ /* Check for overlap, and copy backwards if so. */ \ sub dst, src, %g1 ; \ cmp %g1, len ; \ bgeu,a,pt %xcc, 2f ; \ nop ; \ /* Copy backwards. */ \ add src, len, src ; \ add dst, len, dst ; \ 1: deccc 1, len ; \ bl,pn %xcc, 3f ; \ dec 1, src ; \ ldub [src], %g1 ; \ dec 1, dst ; \ ba %xcc, 1b ; \ stb %g1, [dst] ; \ 2: /* Do the fast version. */ \ _MEMCPY(dst, src, len, EMPTY, EMPTY, EMPTY, EMPTY) ; \ 3: /* * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len) */ ENTRY(ascopy) wr %o0, 0, %asi _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi) retl nop END(ascopy) /* * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len) */ ENTRY(ascopyfrom) wr %o0, 0, %asi _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi) retl nop END(ascopyfrom) /* * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len) */ ENTRY(ascopyto) wr %o1, 0, %asi _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY) retl nop END(ascopyto) /* * void aszero(u_long asi, vm_offset_t pa, size_t len) */ ENTRY(aszero) wr %o0, 0, %asi _MEMSET(%o1, %g0, %o2, a, %asi) retl nop END(aszero) /* * int bcmp(const void *b1, const void *b2, size_t len) */ ENTRY(bcmp) brz,pn %o2, 2f clr %o3 1: ldub [%o0 + %o3], %o4 ldub [%o1 + %o3], %o5 cmp %o4, %o5 bne,pn %xcc, 2f inc %o3 deccc %o2 bne,pt %xcc, 1b nop 2: retl mov %o2, %o0 END(bcmp) /* * void bcopy(const void *src, void *dst, size_t len) */ ENTRY(bcopy) _MEMMOVE(%o1, %o0, %o2) retl nop END(bcopy) /* * void bzero(void *b, size_t len) */ ENTRY(bzero) _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY) retl nop END(bzero) /* * int copystr(const void *src, void *dst, size_t len, size_t *done) */ ENTRY(copystr) _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY) retl mov %g1, %o0 END(copystr) /* * void *memcpy(void *dst, const void *src, size_t len) */ ENTRY(memcpy) mov %o0, %o3 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY) retl nop END(memcpy) /* * void *memmove(void *dst, const void *src, size_t len) */ ENTRY(memmove) mov %o0, %o3 _MEMMOVE(%o3, %o1, %o2) retl nop END(memmove) /* * void *memset(void *b, int c, size_t len) */ ENTRY(memset) mov %o0, %o3 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY) retl nop END(memset) .globl copy_nofault_begin copy_nofault_begin: nop /* * int copyin(const void *uaddr, void *kaddr, size_t len) */ ENTRY(copyin) wr %g0, ASI_AIUP, %asi _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi) retl clr %o0 END(copyin) /* * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) */ ENTRY(copyinstr) wr %g0, ASI_AIUP, %asi _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY) retl mov %g1, %o0 END(copyinstr) /* * int copyout(const void *kaddr, void *uaddr, size_t len) */ ENTRY(copyout) wr %g0, ASI_AIUP, %asi _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY) retl clr %o0 END(copyout) .globl copy_nofault_end copy_nofault_end: nop ENTRY(copy_fault) retl mov EFAULT, %o0 END(copy_fault) .globl fs_nofault_begin fs_nofault_begin: nop /* * Chatty aliases for fetch, store functions. */ .globl fubyte, fusword, fuword, subyte, susword, suword .set fubyte, fuword8 .set fusword, fuword16 .set fuword, fuword64 .set subyte, suword8 .set susword, suword16 .set suword, suword64 - .globl casuword32, casuword, fuptr, suptr - .set casuword, casuword64 + .globl casuword32_int, casuword64_int, fuptr, suptr .set fuptr, fuword64 .set suptr, suword64 /* * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s) */ -ENTRY(casuword32) +ENTRY(casuword32_int) casa [%o0] ASI_AIUP, %o1, %o2 retl mov %o2, %o0 -END(casuword32) +END(casuword32_int) /* * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s) */ -ENTRY(casuword64) +ENTRY(casuword64_int) casxa [%o0] ASI_AIUP, %o1, %o2 retl mov %o2, %o0 -END(casuword64) +END(casuword64_int) /* * int fuword8(const void *base) */ ENTRY(fuword8) retl lduba [%o0] ASI_AIUP, %o0 END(fuword8) /* * int fuword16(const void *base) */ ENTRY(fuword16) retl lduha [%o0] ASI_AIUP, %o0 END(fuword16) /* * int32_t fuword32(const void *base) */ ENTRY(fuword32) retl lduwa [%o0] ASI_AIUP, %o0 END(fuword32) /* * int64_t fuword64(const void *base) */ ENTRY(fuword64) retl ldxa [%o0] ASI_AIUP, %o0 END(fuword64) /* * int suword8(const void *base, int word) */ ENTRY(suword8) stba %o1, [%o0] ASI_AIUP retl clr %o0 END(suword8) /* * int suword16(const void *base, int word) */ ENTRY(suword16) stha %o1, [%o0] ASI_AIUP retl clr %o0 END(suword16) /* * int suword32(const void *base, int32_t word) */ ENTRY(suword32) stwa %o1, [%o0] ASI_AIUP retl clr %o0 END(suword32) /* * int suword64(const void *base, int64_t word) */ ENTRY(suword64) stxa %o1, [%o0] ASI_AIUP retl clr %o0 END(suword64) .globl fs_nofault_end fs_nofault_end: nop ENTRY(fs_fault) retl mov -1, %o0 END(fs_fault) .globl fas_nofault_begin fas_nofault_begin: /* * int fasword8(u_long asi, uint64_t addr, uint8_t *val) */ ENTRY(fasword8) wr %o0, 0, %asi membar #Sync lduba [%o1] %asi, %o3 membar #Sync stb %o3, [%o2] retl clr %o0 END(fasword8) /* * int fasword16(u_long asi, uint64_t addr, uint16_t *val) */ ENTRY(fasword16) wr %o0, 0, %asi membar #Sync lduha [%o1] %asi, %o3 membar #Sync sth %o3, [%o2] retl clr %o0 END(fasword16) /* * int fasword32(u_long asi, uint64_t addr, uint32_t *val) */ ENTRY(fasword32) wr %o0, 0, %asi membar #Sync lduwa [%o1] %asi, %o3 membar #Sync stw %o3, [%o2] retl clr %o0 END(fasword32) .globl fas_nofault_end fas_nofault_end: nop .globl fas_fault ENTRY(fas_fault) retl mov -1, %o0 END(fas_fault) .globl fpu_fault_begin fpu_fault_begin: nop /* * void spitfire_block_copy(void *src, void *dst, size_t len) */ ENTRY(spitfire_block_copy) rdpr %pstate, %o3 wrpr %g0, PSTATE_NORMAL, %pstate wr %g0, ASI_BLK_S, %asi wr %g0, FPRS_FEF, %fprs sub PCB_REG, TF_SIZEOF, %o4 ldx [%o4 + TF_FPRS], %o5 andcc %o5, FPRS_FEF, %g0 bz,a,pt %xcc, 1f nop stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi membar #Sync andn %o5, FPRS_FEF, %o5 stx %o5, [%o4 + TF_FPRS] ldx [PCB_REG + PCB_FLAGS], %o4 or %o4, PCB_FEF, %o4 stx %o4, [PCB_REG + PCB_FLAGS] 1: wrpr %o3, 0, %pstate ldda [%o0] %asi, %f0 add %o0, VIS_BLOCKSIZE, %o0 sub %o2, VIS_BLOCKSIZE, %o2 2: ldda [%o0] %asi, %f16 fsrc1 %f0, %f32 fsrc1 %f2, %f34 fsrc1 %f4, %f36 fsrc1 %f6, %f38 fsrc1 %f8, %f40 fsrc1 %f10, %f42 fsrc1 %f12, %f44 fsrc1 %f14, %f46 stda %f32, [%o1] %asi add %o0, VIS_BLOCKSIZE, %o0 subcc %o2, VIS_BLOCKSIZE, %o2 bz,pn %xcc, 3f add %o1, VIS_BLOCKSIZE, %o1 ldda [%o0] %asi, %f0 fsrc1 %f16, %f32 fsrc1 %f18, %f34 fsrc1 %f20, %f36 fsrc1 %f22, %f38 fsrc1 %f24, %f40 fsrc1 %f26, %f42 fsrc1 %f28, %f44 fsrc1 %f30, %f46 stda %f32, [%o1] %asi add %o0, VIS_BLOCKSIZE, %o0 sub %o2, VIS_BLOCKSIZE, %o2 ba,pt %xcc, 2b add %o1, VIS_BLOCKSIZE, %o1 3: membar #Sync stda %f16, [%o1] %asi membar #Sync retl wr %g0, 0, %fprs END(spitfire_block_copy) /* * void zeus_block_copy(void *src, void *dst, size_t len) */ ENTRY(zeus_block_copy) prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0 rdpr %pstate, %o3 wrpr %g0, PSTATE_NORMAL, %pstate wr %g0, ASI_BLK_S, %asi wr %g0, FPRS_FEF, %fprs sub PCB_REG, TF_SIZEOF, %o4 ldx [%o4 + TF_FPRS], %o5 andcc %o5, FPRS_FEF, %g0 bz,a,pt %xcc, 1f nop stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi membar #Sync andn %o5, FPRS_FEF, %o5 stx %o5, [%o4 + TF_FPRS] ldx [PCB_REG + PCB_FLAGS], %o4 or %o4, PCB_FEF, %o4 stx %o4, [PCB_REG + PCB_FLAGS] 1: wrpr %o3, 0, %pstate ldd [%o0 + (0 * 8)], %f0 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0 ldd [%o0 + (1 * 8)], %f2 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0 fmovd %f0, %f32 ldd [%o0 + (2 * 8)], %f4 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0 fmovd %f2, %f34 ldd [%o0 + (3 * 8)], %f6 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1 fmovd %f4, %f36 ldd [%o0 + (4 * 8)], %f8 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1 fmovd %f6, %f38 ldd [%o0 + (5 * 8)], %f10 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1 fmovd %f8, %f40 ldd [%o0 + (6 * 8)], %f12 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1 fmovd %f10, %f42 ldd [%o0 + (7 * 8)], %f14 ldd [%o0 + (8 * 8)], %f0 sub %o2, VIS_BLOCKSIZE, %o2 add %o0, VIS_BLOCKSIZE, %o0 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1 ba,pt %xcc, 2f prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1 .align 32 2: ldd [%o0 + (1 * 8)], %f2 fmovd %f12, %f44 ldd [%o0 + (2 * 8)], %f4 fmovd %f14, %f46 stda %f32, [%o1] %asi ldd [%o0 + (3 * 8)], %f6 fmovd %f0, %f32 ldd [%o0 + (4 * 8)], %f8 fmovd %f2, %f34 ldd [%o0 + (5 * 8)], %f10 fmovd %f4, %f36 ldd [%o0 + (6 * 8)], %f12 fmovd %f6, %f38 ldd [%o0 + (7 * 8)], %f14 fmovd %f8, %f40 ldd [%o0 + (8 * 8)], %f0 fmovd %f10, %f42 sub %o2, VIS_BLOCKSIZE, %o2 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0 add %o1, VIS_BLOCKSIZE, %o1 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1 add %o0, VIS_BLOCKSIZE, %o0 cmp %o2, VIS_BLOCKSIZE + 8 bgu,pt %xcc, 2b prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1 ldd [%o0 + (1 * 8)], %f2 fsrc1 %f12, %f44 ldd [%o0 + (2 * 8)], %f4 fsrc1 %f14, %f46 stda %f32, [%o1] %asi ldd [%o0 + (3 * 8)], %f6 fsrc1 %f0, %f32 ldd [%o0 + (4 * 8)], %f8 fsrc1 %f2, %f34 ldd [%o0 + (5 * 8)], %f10 fsrc1 %f4, %f36 ldd [%o0 + (6 * 8)], %f12 fsrc1 %f6, %f38 ldd [%o0 + (7 * 8)], %f14 fsrc1 %f8, %f40 add %o1, VIS_BLOCKSIZE, %o1 fsrc1 %f10, %f42 fsrc1 %f12, %f44 fsrc1 %f14, %f46 stda %f32, [%o1] %asi membar #Sync retl wr %g0, 0, %fprs END(zeus_block_copy) /* * void spitfire_block_zero(void *dst, size_t len) * void zeus_block_zero(void *dst, size_t len) */ ALTENTRY(zeus_block_zero) ENTRY(spitfire_block_zero) rdpr %pstate, %o3 wrpr %g0, PSTATE_NORMAL, %pstate wr %g0, ASI_BLK_S, %asi wr %g0, FPRS_FEF, %fprs sub PCB_REG, TF_SIZEOF, %o4 ldx [%o4 + TF_FPRS], %o5 andcc %o5, FPRS_FEF, %g0 bz,a,pt %xcc, 1f nop stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi membar #Sync andn %o5, FPRS_FEF, %o5 stx %o5, [%o4 + TF_FPRS] ldx [PCB_REG + PCB_FLAGS], %o4 or %o4, PCB_FEF, %o4 stx %o4, [PCB_REG + PCB_FLAGS] 1: wrpr %o3, 0, %pstate fzero %f0 fzero %f2 fzero %f4 fzero %f6 fzero %f8 fzero %f10 fzero %f12 fzero %f14 1: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi sub %o1, (4 * VIS_BLOCKSIZE), %o1 brnz,pt %o1, 1b add %o0, (4 * VIS_BLOCKSIZE), %o0 membar #Sync retl wr %g0, 0, %fprs END(spitfire_block_zero) .globl fpu_fault_end fpu_fault_end: nop .globl fpu_fault_size .set fpu_fault_size, fpu_fault_end - fpu_fault_begin ENTRY(longjmp) set 1, %g3 movrz %o1, %o1, %g3 mov %o0, %g1 ldx [%g1 + _JB_FP], %g2 1: cmp %fp, %g2 bl,a,pt %xcc, 1b restore bne,pn %xcc, 2f ldx [%g1 + _JB_SP], %o2 cmp %o2, %sp blt,pn %xcc, 2f movge %xcc, %o2, %sp ldx [%g1 + _JB_PC], %o7 retl mov %g3, %o0 2: PANIC("longjmp botch", %l1) END(longjmp) ENTRY(setjmp) stx %sp, [%o0 + _JB_SP] stx %o7, [%o0 + _JB_PC] stx %fp, [%o0 + _JB_FP] retl clr %o0 END(setjmp) /* * void ofw_entry(cell_t args[]) */ ENTRY(ofw_entry) save %sp, -CCFSZ, %sp SET(ofw_vec, %l7, %l6) ldx [%l6], %l6 rdpr %pstate, %l7 andn %l7, PSTATE_AM | PSTATE_IE, %l5 wrpr %l5, 0, %pstate SET(tba_taken_over, %l5, %l4) brz,pn %l4, 1f rdpr %wstate, %l5 andn %l5, WSTATE_PROM_MASK, %l3 wrpr %l3, WSTATE_PROM_KMIX, %wstate 1: call %l6 mov %i0, %o0 brz,pn %l4, 1f nop wrpr %g0, %l5, %wstate 1: wrpr %l7, 0, %pstate ret restore %o0, %g0, %o0 END(ofw_entry) /* * void ofw_exit(cell_t args[]) */ ENTRY(ofw_exit) save %sp, -CCFSZ, %sp flushw SET(ofw_tba, %l7, %l5) ldx [%l5], %l5 rdpr %pstate, %l7 andn %l7, PSTATE_AM | PSTATE_IE, %l7 wrpr %l7, 0, %pstate rdpr %wstate, %l7 andn %l7, WSTATE_PROM_MASK, %l7 wrpr %l7, WSTATE_PROM_KMIX, %wstate wrpr %l5, 0, %tba ! restore the OFW trap table SET(ofw_vec, %l7, %l6) ldx [%l6], %l6 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0) sub %l0, SPOFF, %fp ! setup a stack in a locked page sub %l0, SPOFF + CCFSZ, %sp mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0 sethi %hi(KERNBASE), %l5 stxa %g0, [%l3] ASI_DMMU flush %l5 wrpr %g0, 0, %tl ! force trap level 0 call %l6 mov %i0, %o0 ! never to return END(ofw_exit) #ifdef GPROF ENTRY(user) nop ENTRY(btrap) nop ENTRY(etrap) nop ENTRY(bintr) nop ENTRY(eintr) nop /* * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t * badness. */ #define GM_STATE 0x0 #define GMON_PROF_OFF 3 #define GMON_PROF_HIRES 4 .globl _mcount .set _mcount, __cyg_profile_func_enter ENTRY(__cyg_profile_func_enter) SET(_gmonparam, %o3, %o2) lduw [%o2 + GM_STATE], %o3 cmp %o3, GMON_PROF_OFF be,a,pn %icc, 1f nop SET(mcount, %o3, %o2) jmpl %o2, %g0 nop 1: retl nop END(__cyg_profile_func_enter) #ifdef GUPROF ENTRY(__cyg_profile_func_exit) SET(_gmonparam, %o3, %o2) lduw [%o2 + GM_STATE], %o3 cmp %o3, GMON_PROF_HIRES be,a,pn %icc, 1f nop SET(mexitcount, %o3, %o2) jmpl %o2, %g0 nop 1: retl nop END(__cyg_profile_func_exit) #endif /* GUPROF */ #endif /* GPROF */ diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index c56555a0120b..26ad7f6ceac9 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -1,464 +1,489 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * Copyright (c) 2001 Jake Burkholder. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12 */ #include __FBSDID("$FreeBSD$"); #include "opt_pmap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include PMAP_STATS_VAR(uma_nsmall_alloc); PMAP_STATS_VAR(uma_nsmall_alloc_oc); PMAP_STATS_VAR(uma_nsmall_free); void cpu_exit(struct thread *td) { struct proc *p; p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } } void cpu_thread_exit(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL); pcb->pcb_nsaved = 0; td->td_frame = (struct trapframe *)pcb - 1; td->td_pcb = pcb; } void cpu_thread_free(struct thread *td) { } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_out[0] = td->td_retval[0]; td->td_frame->tf_out[1] = td->td_retval[1]; td->td_frame->tf_tstate &= ~TSTATE_XCC_C; break; case ERESTART: /* * Undo the tpc advancement we have done on syscall * enter, we want to reexecute the system call. */ td->td_frame->tf_tpc = td->td_pcb->pcb_tpc; td->td_frame->tf_tnpc -= 4; break; case EJUSTRETURN: break; default: td->td_frame->tf_out[0] = SV_ABI_ERRNO(td->td_proc, error); td->td_frame->tf_tstate |= TSTATE_XCC_C; break; } } void cpu_copy_thread(struct thread *td, struct thread *td0) { struct trapframe *tf; struct frame *fr; struct pcb *pcb; bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); pcb = td->td_pcb; tf = td->td_frame; fr = (struct frame *)tf - 1; fr->fr_local[0] = (u_long)fork_return; fr->fr_local[1] = (u_long)td; fr->fr_local[2] = (u_long)tf; pcb->pcb_pc = (u_long)fork_trampoline - 8; pcb->pcb_sp = (u_long)fr - SPOFF; /* Setup to release the spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_pil = 0; } void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf; uint64_t sp; if (td == curthread) flushw(); tf = td->td_frame; sp = (uint64_t)stack->ss_sp + stack->ss_size; tf->tf_out[0] = (uint64_t)arg; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tpc = (uint64_t)entry; tf->tf_tnpc = tf->tf_tpc + 4; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int cpu_set_user_tls(struct thread *td, void *tls_base) { if (td == curthread) flushw(); td->td_frame->tf_global[7] = (uint64_t)tls_base; return (0); } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct trapframe *tf; struct frame *fp; struct pcb *pcb1; struct pcb *pcb2; vm_offset_t sp; int error; int i; KASSERT(td1 == curthread || td1 == &thread0, ("cpu_fork: p1 not curproc and not proc0")); if ((flags & RFPROC) == 0) return; p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp; p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap); /* The pcb must be aligned on a 64-byte boundary. */ pcb1 = td1->td_pcb; pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL); td2->td_pcb = pcb2; /* * Ensure that p1's pcb is up to date. */ critical_enter(); if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0) savefpctx(pcb1->pcb_ufp); critical_exit(); /* Make sure the copied windows are spilled. */ flushw(); /* Copy the pcb (this will copy the windows saved in the pcb, too). */ bcopy(pcb1, pcb2, sizeof(*pcb1)); /* * If we're creating a new user process and we're sharing the address * space, the parent's top most frame must be saved in the pcb. The * child will pop the frame when it returns to user mode, and may * overwrite it with its own data causing much suffering for the * parent. We check if its already in the pcb, and if not copy it * in. Its unlikely that the copyin will fail, but if so there's not * much we can do. The parent will likely crash soon anyway in that * case. */ if ((flags & RFMEM) != 0 && td1 != &thread0) { sp = td1->td_frame->tf_sp; for (i = 0; i < pcb1->pcb_nsaved; i++) { if (pcb1->pcb_rwsp[i] == sp) break; } if (i == pcb1->pcb_nsaved) { error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i], sizeof(struct rwindow)); if (error == 0) { pcb1->pcb_rwsp[i] = sp; pcb1->pcb_nsaved++; } } } /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ tf = (struct trapframe *)pcb2 - 1; bcopy(td1->td_frame, tf, sizeof(*tf)); tf->tf_out[0] = 0; /* Child returns zero */ tf->tf_out[1] = 0; tf->tf_tstate &= ~TSTATE_XCC_C; /* success */ tf->tf_fprs = 0; td2->td_frame = tf; fp = (struct frame *)tf - 1; fp->fr_local[0] = (u_long)fork_return; fp->fr_local[1] = (u_long)td2; fp->fr_local[2] = (u_long)tf; /* Terminate stack traces at this frame. */ fp->fr_pc = fp->fr_fp = 0; pcb2->pcb_sp = (u_long)fp - SPOFF; pcb2->pcb_pc = (u_long)fork_trampoline - 8; /* Setup to release the spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_pil = 0; /* * Now, cpu_switch() can schedule the new process. */ } void cpu_reset(void) { static char bspec[64] = ""; phandle_t chosen; static struct { cell_t name; cell_t nargs; cell_t nreturns; cell_t bootspec; } args = { (cell_t)"boot", 1, 0, (cell_t)bspec }; if ((chosen = OF_finddevice("/chosen")) != -1) { if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1) bspec[0] = '\0'; bspec[sizeof(bspec) - 1] = '\0'; } cpu_shutdown(&args); } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { struct frame *fp; struct pcb *pcb; pcb = td->td_pcb; fp = (struct frame *)(pcb->pcb_sp + SPOFF); fp->fr_local[0] = (u_long)func; fp->fr_local[1] = (u_long)arg; } bool cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused) { return (true); } int cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused, int com __unused, void *data __unused) { return (EINVAL); } int is_physical_memory(vm_paddr_t addr) { struct ofw_mem_region *mr; for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++) if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size) return (1); return (0); } void swi_vm(void *v) { /* Nothing to do here - busdma bounce buffers are not implemented. */ } void * uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, int wait) { vm_paddr_t pa; vm_page_t m; void *va; PMAP_STATS_INC(uma_nsmall_alloc); *flags = UMA_SLAB_PRIV; m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (m == NULL) return (NULL); pa = VM_PAGE_TO_PHYS(m); if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) { KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0, ("uma_small_alloc: free page %p still has mappings!", m)); PMAP_STATS_INC(uma_nsmall_alloc_oc); m->md.color = DCACHE_COLOR(pa); dcache_page_inval(pa); } va = (void *)TLB_PHYS_TO_DIRECT(pa); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) cpu_block_zero(va, PAGE_SIZE); return (va); } void uma_small_free(void *mem, vm_size_t size, u_int8_t flags) { vm_page_t m; PMAP_STATS_INC(uma_nsmall_free); m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem)); vm_page_unwire_noq(m); vm_page_free(m); } void sf_buf_map(struct sf_buf *sf, int flags) { pmap_qenter(sf->kva, &sf->m, 1); } int sf_buf_unmap(struct sf_buf *sf) { pmap_qremove(sf->kva, 1); return (1); } + +uint32_t casuword32_int(volatile uint32_t *base, uint32_t oldval, + uint32_t newval); +uint32_t +casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval) +{ + uint32_t ret; + + ret = casuword32_int(base, oldval, newval); + if (ret != -1) + ret = ret != oldval; + return (ret); +} + +u_long casuword64_int(volatile u_long *p, u_long oldval, u_long newval); +u_long +casuword(volatile u_long *p, u_long oldval, u_long newval) +{ + u_long ret; + + ret = casuword64_int(p, oldval, newval); + if (ret != -1L) + ret = ret != oldval; + return (ret); +}