Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142105479
D48062.id147897.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
3 KB
Referenced Files
None
Subscribers
None
D48062.id147897.diff
View Options
diff --git a/include/i386/Makefile b/include/i386/Makefile
--- a/include/i386/Makefile
+++ b/include/i386/Makefile
@@ -16,6 +16,7 @@
INCS+= \
counter.h \
md_var.h \
+ npx.h \
pcpu.h \
pcpu_aux.h
INCSDIR= ${INCLUDEDIR}/i386
diff --git a/sys/i386/i386/npx.c b/sys/i386/i386/npx.c
--- a/sys/i386/i386/npx.c
+++ b/sys/i386/i386/npx.c
@@ -57,6 +57,7 @@
#include <machine/cputypes.h>
#include <machine/frame.h>
#include <machine/md_var.h>
+#include <machine/npx.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/resource.h>
@@ -1522,6 +1523,31 @@
return ((curpcb->pcb_flags & PCB_KERNNPX_THR) != 0);
}
+u_int
+fpu_kern_critical_enter(void)
+{
+ u_int res;
+
+ critical_enter();
+ if ((rcr0() & CR0_TS) != 0) {
+ res = 1;
+ fpu_enable();
+ } else
+ res = 0;
+
+ return (res);
+}
+
+void
+fpu_kern_critical_exit(u_int enter_result)
+{
+ CRITICAL_ASSERT(curthread);
+
+ if (enter_result != 0)
+ fpu_disable();
+ critical_exit();
+}
+
/*
* FPU save area alloc/free/init utility routines
*/
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,6 +32,7 @@
#ifdef _KERNEL
#include <machine/md_var.h>
+#include <i386/npx.h>
#include <machine/specialreg.h>
#endif
@@ -455,6 +456,42 @@
return (res);
}
+static __inline uint64_t __attribute__((__target__("sse2")))
+atomic_load_acq_64_sse2(const volatile uint64_t *p)
+{
+ char _Alignas(16) xmm_sav[16];
+ uint64_t res;
+ u_int fpu_res;
+ volatile __unused u_int res_half;
+
+ /*
+ * Prefetch the target content before entering into a critical section
+ * as we want to avoid cache miss penalty inside it.
+ */
+ res_half = *(const volatile u_int*)p;
+
+ fpu_res = fpu_kern_critical_enter();
+
+ /*
+ * All XMM registers are scratch ones in the ABI, but we generally don't
+ * use them in the kernel and userland XMM registers may not have been
+ * saved. Accessing the PCB and taking a branch to determine that is
+ * likely to consume more time than just always saving the temporary XMM
+ * register we use.
+ */
+ __asm (
+ " movdqa %%xmm0, %0; "
+ " movq %2, %%xmm0; "
+ " movq %%xmm0, %1; "
+ " movdqa %0, %%xmm0; "
+ : "=m" (*xmm_sav), /* 0 */
+ "=m" (res) /* 1 */
+ : "m" (*p)); /* 2 */
+
+ fpu_kern_critical_exit(fpu_res);
+ return (res);
+}
+
static __inline void
atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
{
@@ -512,8 +549,10 @@
if ((cpu_feature & CPUID_CX8) == 0)
return (atomic_load_acq_64_i386(p));
- else
+ else if ((cpu_feature & CPUID_SSE2) == 0)
return (atomic_load_acq_64_i586(p));
+ else
+ return (atomic_load_acq_64_sse2(p));
}
static __inline void
@@ -571,6 +610,21 @@
}
}
+#else /* !_KERNEL */
+
+static __inline uint64_t __attribute__((__target__("sse2")))
+atomic_load_acq_64_sse2(const volatile uint64_t *p)
+{
+ uint64_t res;
+
+ __asm (
+ " movq %1, %0; "
+ : "=x" (res) /* 0 */
+ : "m" (*p)); /* 1 */
+
+ return (res);
+}
+
#endif /* _KERNEL */
ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v);
diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h
--- a/sys/i386/include/npx.h
+++ b/sys/i386/include/npx.h
@@ -78,6 +78,8 @@
int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx);
int fpu_kern_thread(u_int flags);
int is_fpu_kern_thread(u_int flags);
+u_int fpu_kern_critical_enter(void);
+void fpu_kern_critical_exit(u_int enter_result);
union savefpu *fpu_save_area_alloc(void);
void fpu_save_area_free(union savefpu *fsa);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Jan 17, 2:01 AM (3 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27676967
Default Alt Text
D48062.id147897.diff (3 KB)
Attached To
Mode
D48062: (draft) i386 atomics: Implement 64-bit loading with SSE2
Attached
Detach File
Event Timeline
Log In to Comment