diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -420,6 +420,7 @@ FBSD_1.7 { _Fork; fspacectl; + membarrier; }; FBSDprivate_1.0 { diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -10032,6 +10032,12 @@ PCPU_SET(ucr3, PMAP_NO_CR3); } +void +pmap_active_cpus(pmap_t pmap, cpuset_t *res) +{ + *res = pmap->pm_active; +} + void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -1977,3 +1977,16 @@ popq %rax retq END(mds_handler_silvermont) + +ENTRY(cpu_sync_core) + movq (%rsp), %rdx + movl %ss, %eax + pushq %rax + pushq %rsp + addq $16, (%rsp) + pushfq + movl %cs, %eax + pushq %rax + pushq %rdx + iretq +END(cpu_sync_core) diff --git a/sys/compat/freebsd32/capabilities.conf b/sys/compat/freebsd32/capabilities.conf --- a/sys/compat/freebsd32/capabilities.conf +++ b/sys/compat/freebsd32/capabilities.conf @@ -428,6 +428,9 @@ ## lseek +## +membarrier + ## ## Allow simple VM operations on the current process. ## diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -1181,5 +1181,7 @@ const struct spacectl_range32 *rqsr, \ int flags, \ struct spacectl_range32 *rmsr); } +581 AUE_NULL STD|NOPROTO {int membarrier(int cmd, unsigned flags, \ + int cpu_id); } ; vim: syntax=off diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3818,6 +3818,7 @@ kern/kern_loginclass.c standard kern/kern_malloc.c standard kern/kern_mbuf.c standard +kern/kern_membarrier.c standard kern/kern_mib.c standard kern/kern_module.c standard kern/kern_mtxpool.c standard diff --git a/sys/kern/kern_membarrier.c b/sys/kern/kern_membarrier.c new file mode 100644 --- /dev/null +++ b/sys/kern/kern_membarrier.c @@ -0,0 +1,187 @@ +/*- + * Copyright (c) 2021 The FreeBSD Foundation + * + * This software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define MEMBARRIER_SUPPORTED_CMDS \ + (MEMBARRIER_CMD_GLOBAL | \ + MEMBARRIER_CMD_GLOBAL_EXPEDITED | \ + MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED | \ + MEMBARRIER_CMD_PRIVATE_EXPEDITED | \ + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED | \ + MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE | \ + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE) + +static void +membarrier_action_seqcst(void *arg __unused) +{ + atomic_thread_fence_seq_cst(); +} + +static void +membarrier_action_seqcst_sync_core(void *arg __unused) +{ + atomic_thread_fence_seq_cst(); + cpu_sync_core(); +} + +/* + * + * XXXKIB: We execute the requested action (seq_cst and possibly + * sync_core) on current CPU as well. There is no guarantee that + * current thread executes anything with the full fence semantics + * during syscall execution. Similarly, cpu_core_sync() semantics + * might be not provided by the syscall return. E.g. on amd64 we + * typically return without IRET. + */ +int +kern_membarrier(struct thread *td, int cmd, unsigned flags, int cpu_id) +{ + struct proc *p, *p1; + struct thread *td1; + cpuset_t cs; + int c, error; + + if (flags != 0 || (cmd & ~MEMBARRIER_SUPPORTED_CMDS) != 0) + return (EINVAL); + + if (cmd == MEMBARRIER_CMD_QUERY) { + td->td_retval[0] = MEMBARRIER_SUPPORTED_CMDS; + return (0); + } + + p = td->td_proc; + error = 0; + + switch (cmd) { + case MEMBARRIER_CMD_GLOBAL: + smp_rendezvous(smp_no_rendezvous_barrier, + membarrier_action_seqcst, smp_no_rendezvous_barrier, NULL); + break; + + case MEMBARRIER_CMD_GLOBAL_EXPEDITED: + if ((td->td_proc->p_flag2 & P2_MEMBAR_GLOBE) == 0) { + error = EPERM; + } else { + CPU_ZERO(&cs); + CPU_FOREACH(c) { + td1 = cpuid_to_pcpu[c]->pc_curthread; + p1 = td1->td_proc; + if (p1 != NULL && + (p1->p_flag2 & P2_MEMBAR_GLOBE) != 0) + CPU_SET(c, &cs); + } + smp_rendezvous_cpus(cs, smp_no_rendezvous_barrier, + membarrier_action_seqcst, + smp_no_rendezvous_barrier, NULL); + } + break; + + case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: + if ((p->p_flag2 & P2_MEMBAR_GLOBE) == 0) { + PROC_LOCK(p); + p->p_flag2 |= P2_MEMBAR_GLOBE; + PROC_UNLOCK(p); + } + break; + + case MEMBARRIER_CMD_PRIVATE_EXPEDITED: + if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE) == 0) { + error = EPERM; + } else { + pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs); + smp_rendezvous_cpus(cs, smp_no_rendezvous_barrier, + membarrier_action_seqcst, + smp_no_rendezvous_barrier, NULL); + } + break; + + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + if ((p->p_flag2 & P2_MEMBAR_PRIVE) == 0) { + PROC_LOCK(p); + p->p_flag2 |= P2_MEMBAR_PRIVE; + PROC_UNLOCK(p); + } + break; + + case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE: + if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) { + error = EPERM; + } else { + /* + * XXXKIB Calculating the IPI multicast mask + * from pmap active mask means that we do not + * call cpu_sync_core() on CPUs that were + * missed from pmap active mask but could be + * switched from or to meantime. This is + * strange semantic but it matches what Linux + * does. + */ + pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs); + + smp_rendezvous_cpus(cs, smp_no_rendezvous_barrier, + membarrier_action_seqcst_sync_core, + smp_no_rendezvous_barrier, NULL); + } + break; + + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: + if ((p->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) { + PROC_LOCK(p); + p->p_flag2 |= P2_MEMBAR_PRIVE_SYNCORE; + PROC_UNLOCK(p); + } + break; + + default: + error = EINVAL; + break; + } + + return (error); +} + +int +sys_membarrier(struct thread *td, struct membarrier_args *uap) +{ + return (kern_membarrier(td, uap->cmd, uap->flags, uap->cpu_id)); +} diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3261,6 +3261,13 @@ _Out_opt_ struct spacectl_range *rmsr, ); } +581 AUE_NULL STD|CAPENABLED { + int membarrier( + int cmd, + unsigned flags, + int cpu_id + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/sys/membarrier.h b/sys/sys/membarrier.h new file mode 100644 --- /dev/null +++ b/sys/sys/membarrier.h @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2021 The FreeBSD Foundation + * + * This software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __SYS_MEMBARRIER_H__ +#define __SYS_MEMBARRIER_H__ + +#include + +#define MEMBARRIER_CMD_QUERY 0x00000000 +#define MEMBARRIER_CMD_GLOBAL 0x00000001 +#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL +#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 0x00000002 +#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 0x00000004 +#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 0x00000008 +#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 0x00000010 +#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 0x00000020 +#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 0x00000040 + +/* + * RSEQ constants are defined for source compatibility but not + * supported, MEMBARRIER_CMD_QUERY does not return them in the mask. + */ +#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ 0x00000080 +#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ 0x00000100 + +#define MEMBARRIER_CMD_FLAG_CPU 0x00000001 + +#ifndef _KERNEL +__BEGIN_DECLS +int membarrier(); +__END_DECLS +#endif /* _KERNEL */ + +#endif /* __SYS_MEMBARRIER_H__ */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -839,6 +839,9 @@ #define P2_NO_NEW_PRIVS 0x00008000 /* Ignore setuid */ #define P2_WXORX_DISABLE 0x00010000 /* WX mappings enabled */ #define P2_WXORX_ENABLE_EXEC 0x00020000 /* WXORX enabled after exec */ +#define P2_MEMBAR_PRIVE 0x00040000 +#define P2_MEMBAR_PRIVE_SYNCORE 0x00040000 +#define P2_MEMBAR_GLOBE 0x00080000 /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ @@ -1160,6 +1163,7 @@ int cpu_idle_wakeup(int); extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); +void cpu_sync_core(void); void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); void userret(struct thread *, struct trapframe *); diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -196,6 +196,8 @@ int inherit); int kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode); +int kern_membarrier(struct thread *td, int cmd, unsigned flags, + int cpu_id); int kern_mkfifoat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode); int kern_mknodat(struct thread *td, int fd, const char *path, diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -120,6 +120,7 @@ #define PMAP_TS_REFERENCED_MAX 5 void pmap_activate(struct thread *td); +void pmap_active_cpus(pmap_t pmap, cpuset_t *res); void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice); void pmap_align_superpage(vm_object_t, vm_ooffset_t, vm_offset_t *,