Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144526585
D32360.id96469.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D32360.id96469.diff
View Options
diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -420,6 +420,7 @@
FBSD_1.7 {
_Fork;
fspacectl;
+ membarrier;
};
FBSDprivate_1.0 {
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -10032,6 +10032,12 @@
PCPU_SET(ucr3, PMAP_NO_CR3);
}
+void
+pmap_active_cpus(pmap_t pmap, cpuset_t *res)
+{
+ *res = pmap->pm_active;
+}
+
void
pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
{
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -1977,3 +1977,20 @@
popq %rax
retq
END(mds_handler_silvermont)
+
+ENTRY(cpu_sync_core)
+/*
+ * Can utilize SERIALIZE when instruction is moved from
+ * 'future extensions' to SDM.
+ */
+ movq (%rsp), %rdx
+ movl %ss, %eax
+ pushq %rax
+ pushq %rsp
+ addq $16, (%rsp)
+ pushfq
+ movl %cs, %eax
+ pushq %rax
+ pushq %rdx
+ iretq
+END(cpu_sync_core)
diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c
--- a/sys/arm64/arm64/vm_machdep.c
+++ b/sys/arm64/arm64/vm_machdep.c
@@ -302,3 +302,9 @@
if (busdma_swi_pending != 0)
busdma_swi();
}
+
+void
+cpu_core_sync(void)
+{
+ /* Do nothing. */
+}
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -152,6 +152,8 @@
(uint64_t)(asid) << ASID_TO_OPERAND_SHIFT; \
})
+#define PMAP_WANT_ACTIVE_CPUS_NAIVE
+
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
diff --git a/sys/compat/freebsd32/capabilities.conf b/sys/compat/freebsd32/capabilities.conf
--- a/sys/compat/freebsd32/capabilities.conf
+++ b/sys/compat/freebsd32/capabilities.conf
@@ -428,6 +428,9 @@
##
lseek
+##
+membarrier
+
##
## Allow simple VM operations on the current process.
##
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -1181,5 +1181,7 @@
const struct spacectl_range32 *rqsr, \
int flags, \
struct spacectl_range32 *rmsr); }
+581 AUE_NULL STD|NOPROTO {int membarrier(int cmd, unsigned flags, \
+ int cpu_id); }
; vim: syntax=off
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3818,6 +3818,7 @@
kern/kern_loginclass.c standard
kern/kern_malloc.c standard
kern/kern_mbuf.c standard
+kern/kern_membarrier.c standard
kern/kern_mib.c standard
kern/kern_module.c standard
kern/kern_mtxpool.c standard
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -774,6 +774,8 @@
p->p_flag2 &= ~P2_NOTRACE;
if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0)
p->p_flag2 &= ~P2_STKGAP_DISABLE;
+ p->p_flag2 &= ~(P2_MEMBAR_PRIVE | P2_MEMBAR_PRIVE_SYNCORE |
+ P2_MEMBAR_GLOBE);
if (p->p_flag & P_PPWAIT) {
p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
cv_broadcast(&p->p_pwait);
diff --git a/sys/kern/kern_membarrier.c b/sys/kern/kern_membarrier.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/kern_membarrier.c
@@ -0,0 +1,191 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/membarrier.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+
+#include <vm/vm_param.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#define MEMBARRIER_SUPPORTED_CMDS ( \
+ MEMBARRIER_CMD_GLOBAL | \
+ MEMBARRIER_CMD_GLOBAL_EXPEDITED | \
+ MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED | \
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED | \
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED | \
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE | \
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
+
+static void
+membarrier_action_seqcst(void *arg __unused)
+{
+ atomic_thread_fence_seq_cst();
+}
+
+static void
+membarrier_action_seqcst_sync_core(void *arg __unused)
+{
+ atomic_thread_fence_seq_cst();
+ cpu_sync_core();
+}
+
+static void
+do_membarrier_ipi(cpuset_t *csp, void (*func)(void *))
+{
+ atomic_thread_fence_seq_cst();
+ smp_rendezvous_cpus(*csp, smp_no_rendezvous_barrier, func,
+ smp_no_rendezvous_barrier, NULL);
+ atomic_thread_fence_seq_cst();
+}
+
+/*
+ *
+ * XXXKIB: We execute the requested action (seq_cst and possibly
+ * sync_core) on current CPU as well. There is no guarantee that
+ * current thread executes anything with the full fence semantics
+ * during syscall execution. Similarly, cpu_core_sync() semantics
+ * might be not provided by the syscall return. E.g. on amd64 we
+ * typically return without IRET.
+ */
+int
+kern_membarrier(struct thread *td, int cmd, unsigned flags, int cpu_id)
+{
+ struct proc *p, *p1;
+ struct thread *td1;
+ cpuset_t cs;
+ int c, error;
+
+ if (flags != 0 || (cmd & ~MEMBARRIER_SUPPORTED_CMDS) != 0)
+ return (EINVAL);
+
+ if (cmd == MEMBARRIER_CMD_QUERY) {
+ td->td_retval[0] = MEMBARRIER_SUPPORTED_CMDS;
+ return (0);
+ }
+
+ p = td->td_proc;
+ error = 0;
+
+ switch (cmd) {
+ case MEMBARRIER_CMD_GLOBAL:
+ do_membarrier_ipi(&all_cpus, membarrier_action_seqcst);
+ break;
+
+ case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
+ if ((td->td_proc->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
+ error = EPERM;
+ } else {
+ CPU_ZERO(&cs);
+ CPU_FOREACH(c) {
+ td1 = cpuid_to_pcpu[c]->pc_curthread;
+ p1 = td1->td_proc;
+ if (p1 != NULL &&
+ (p1->p_flag2 & P2_MEMBAR_GLOBE) != 0)
+ CPU_SET(c, &cs);
+ }
+ do_membarrier_ipi(&cs, membarrier_action_seqcst);
+ }
+ break;
+
+ case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
+ if ((p->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
+ PROC_LOCK(p);
+ p->p_flag2 |= P2_MEMBAR_GLOBE;
+ PROC_UNLOCK(p);
+ }
+ break;
+
+ case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
+ if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
+ error = EPERM;
+ } else {
+ pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
+ do_membarrier_ipi(&cs, membarrier_action_seqcst);
+ }
+ break;
+
+ case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+ if ((p->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
+ PROC_LOCK(p);
+ p->p_flag2 |= P2_MEMBAR_PRIVE;
+ PROC_UNLOCK(p);
+ }
+ break;
+
+ case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
+ if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
+ error = EPERM;
+ } else {
+ /*
+ * Calculating the IPI multicast mask from
+ * pmap active mask means that we do not call
+ * cpu_sync_core() on CPUs that were missed
+ * from pmap active mask but could be switched
+ * from or to meantime. This is fine at least
+ * on amd64 because threads always use slow
+ * (IRETQ) path to return from syscall after
+ * context switch.
+ */
+ pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
+
+ do_membarrier_ipi(&cs,
+ membarrier_action_seqcst_sync_core);
+ }
+ break;
+
+ case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
+ if ((p->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
+ PROC_LOCK(p);
+ p->p_flag2 |= P2_MEMBAR_PRIVE_SYNCORE;
+ PROC_UNLOCK(p);
+ }
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+int
+sys_membarrier(struct thread *td, struct membarrier_args *uap)
+{
+ return (kern_membarrier(td, uap->cmd, uap->flags, uap->cpu_id));
+}
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3261,6 +3261,13 @@
_Out_opt_ struct spacectl_range *rmsr,
);
}
+581 AUE_NULL STD|CAPENABLED {
+ int membarrier(
+ int cmd,
+ unsigned flags,
+ int cpu_id
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/sys/membarrier.h b/sys/sys/membarrier.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/membarrier.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __SYS_MEMBARRIER_H__
+#define __SYS_MEMBARRIER_H__
+
+#include <sys/cdefs.h>
+
+#define MEMBARRIER_CMD_QUERY 0x00000000
+#define MEMBARRIER_CMD_GLOBAL 0x00000001
+#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL
+#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 0x00000002
+#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 0x00000004
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 0x00000008
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 0x00000010
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 0x00000020
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 0x00000040
+
+/*
+ * RSEQ constants are defined for source compatibility but not
+ * supported, MEMBARRIER_CMD_QUERY does not return them in the mask.
+ */
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ 0x00000080
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ 0x00000100
+
+#define MEMBARRIER_CMD_FLAG_CPU 0x00000001
+
+#ifndef _KERNEL
+__BEGIN_DECLS
+int membarrier(int, unsigned, int);
+__END_DECLS
+#endif /* _KERNEL */
+
+#endif /* __SYS_MEMBARRIER_H__ */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -839,6 +839,9 @@
#define P2_NO_NEW_PRIVS 0x00008000 /* Ignore setuid */
#define P2_WXORX_DISABLE 0x00010000 /* WX mappings enabled */
#define P2_WXORX_ENABLE_EXEC 0x00020000 /* WXORX enabled after exec */
+#define P2_MEMBAR_PRIVE 0x00040000
+#define P2_MEMBAR_PRIVE_SYNCORE 0x00040000
+#define P2_MEMBAR_GLOBE 0x00080000
/* Flags protected by proctree_lock, kept in p_treeflags. */
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
@@ -1160,6 +1163,7 @@
int cpu_idle_wakeup(int);
extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */
void cpu_switch(struct thread *, struct thread *, struct mtx *);
+void cpu_sync_core(void);
void cpu_throw(struct thread *, struct thread *) __dead2;
bool curproc_sigkilled(void);
void userret(struct thread *, struct trapframe *);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -196,6 +196,8 @@
int inherit);
int kern_mkdirat(struct thread *td, int fd, const char *path,
enum uio_seg segflg, int mode);
+int kern_membarrier(struct thread *td, int cmd, unsigned flags,
+ int cpu_id);
int kern_mkfifoat(struct thread *td, int fd, const char *path,
enum uio_seg pathseg, int mode);
int kern_mknodat(struct thread *td, int fd, const char *path,
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
--- a/sys/vm/pmap.h
+++ b/sys/vm/pmap.h
@@ -120,6 +120,7 @@
#define PMAP_TS_REFERENCED_MAX 5
void pmap_activate(struct thread *td);
+void pmap_active_cpus(pmap_t pmap, cpuset_t *res);
void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
int advice);
void pmap_align_superpage(vm_object_t, vm_ooffset_t, vm_offset_t *,
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -79,6 +79,7 @@
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/vmem.h>
#include <sys/vmmeter.h>
@@ -888,6 +889,31 @@
#endif
}
+#ifdef PMAP_WANT_ACTIVE_CPUS_NAIVE
+void
+pmap_active_cpus_naive(pmap_t pmap, cpuset_t *res)
+{
+ struct thread *td;
+ struct proc *p;
+ struct vmspace *vm;
+ int c;
+
+ CPU_ZERO(res);
+ CPU_FOREACH(c) {
+ td = cpuid_to_pcpu[c]->pc_curthread;
+ p = td->td_proc;
+ if (p == NULL)
+ continue;
+ vm = vmspace_acquire_ref(p);
+ if (vm == NULL)
+ continue;
+ if (pmap == vmspace_pmap(vm))
+ CPU_SET(c, res);
+ vmspace_free(vm);
+ }
+}
+#endif
+
/*
* Allow userspace to directly trigger the VM drain routine for testing
* purposes.
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Feb 10, 5:07 AM (16 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28614129
Default Alt Text
D32360.id96469.diff (14 KB)
Attached To
Mode
D32360: Add membarrier(2)
Attached
Detach File
Event Timeline
Log In to Comment