Page MenuHomeFreeBSD

D48062.id147897.diff
No OneTemporary

D48062.id147897.diff

diff --git a/include/i386/Makefile b/include/i386/Makefile
--- a/include/i386/Makefile
+++ b/include/i386/Makefile
@@ -16,6 +16,7 @@
INCS+= \
counter.h \
md_var.h \
+ npx.h \
pcpu.h \
pcpu_aux.h
INCSDIR= ${INCLUDEDIR}/i386
diff --git a/sys/i386/i386/npx.c b/sys/i386/i386/npx.c
--- a/sys/i386/i386/npx.c
+++ b/sys/i386/i386/npx.c
@@ -57,6 +57,7 @@
#include <machine/cputypes.h>
#include <machine/frame.h>
#include <machine/md_var.h>
+#include <machine/npx.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/resource.h>
@@ -1522,6 +1523,31 @@
return ((curpcb->pcb_flags & PCB_KERNNPX_THR) != 0);
}
+u_int
+fpu_kern_critical_enter(void)
+{
+ u_int res;
+
+ critical_enter();
+ if ((rcr0() & CR0_TS) != 0) {
+ res = 1;
+ fpu_enable();
+ } else
+ res = 0;
+
+ return (res);
+}
+
+void
+fpu_kern_critical_exit(u_int enter_result)
+{
+ CRITICAL_ASSERT(curthread);
+
+ if (enter_result != 0)
+ fpu_disable();
+ critical_exit();
+}
+
/*
* FPU save area alloc/free/init utility routines
*/
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,6 +32,7 @@
#ifdef _KERNEL
#include <machine/md_var.h>
+#include <i386/npx.h>
#include <machine/specialreg.h>
#endif
@@ -455,6 +456,42 @@
return (res);
}
+static __inline uint64_t __attribute__((__target__("sse2")))
+atomic_load_acq_64_sse2(const volatile uint64_t *p)
+{
+ char _Alignas(16) xmm_sav[16];
+ uint64_t res;
+ u_int fpu_res;
+ volatile __unused u_int res_half;
+
+ /*
+ * Prefetch the target content before entering into a critical section
+ * as we want to avoid cache miss penalty inside it.
+ */
+ res_half = *(const volatile u_int*)p;
+
+ fpu_res = fpu_kern_critical_enter();
+
+ /*
+ * All XMM registers are scratch ones in the ABI, but we generally don't
+ * use them in the kernel and userland XMM registers may not have been
+ * saved. Accessing the PCB and taking a branch to determine that is
+ * likely to consume more time than just always saving the temporary XMM
+ * register we use.
+ */
+ __asm (
+ " movdqa %%xmm0, %0; "
+ " movq %2, %%xmm0; "
+ " movq %%xmm0, %1; "
+ " movdqa %0, %%xmm0; "
+ : "=m" (*xmm_sav), /* 0 */
+ "=m" (res) /* 1 */
+ : "m" (*p)); /* 2 */
+
+ fpu_kern_critical_exit(fpu_res);
+ return (res);
+}
+
static __inline void
atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
{
@@ -512,8 +549,10 @@
if ((cpu_feature & CPUID_CX8) == 0)
return (atomic_load_acq_64_i386(p));
- else
+ else if ((cpu_feature & CPUID_SSE2) == 0)
return (atomic_load_acq_64_i586(p));
+ else
+ return (atomic_load_acq_64_sse2(p));
}
static __inline void
@@ -571,6 +610,21 @@
}
}
+#else /* !_KERNEL */
+
+static __inline uint64_t __attribute__((__target__("sse2")))
+atomic_load_acq_64_sse2(const volatile uint64_t *p)
+{
+ uint64_t res;
+
+ __asm (
+ " movq %1, %0; "
+ : "=x" (res) /* 0 */
+ : "m" (*p)); /* 1 */
+
+ return (res);
+}
+
#endif /* _KERNEL */
ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v);
diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h
--- a/sys/i386/include/npx.h
+++ b/sys/i386/include/npx.h
@@ -78,6 +78,8 @@
int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx);
int fpu_kern_thread(u_int flags);
int is_fpu_kern_thread(u_int flags);
+u_int fpu_kern_critical_enter(void);
+void fpu_kern_critical_exit(u_int enter_result);
union savefpu *fpu_save_area_alloc(void);
void fpu_save_area_free(union savefpu *fsa);

File Metadata

Mime Type
text/plain
Expires
Sat, Jan 17, 2:01 AM (3 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27676967
Default Alt Text
D48062.id147897.diff (3 KB)

Event Timeline