diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -140,19 +140,13 @@ #define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) -#else /* !KLD_MODULE && __GNUCLIKE_ASM */ - -/* - * For userland, always use lock prefixes so that the binaries will run - * on both SMP and !SMP systems. - */ -#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE) -#define MPLOCKED "lock ; " -#else -#define MPLOCKED -#endif +#else /* !__GNUCLIKE_ASM */ /* + * Always use lock prefixes. The result is slighly less optimal for + * UP systems, but it matters less now, and sometimes UP is emulated + * over SMP. + * * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. @@ -161,7 +155,7 @@ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock;" OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ @@ -170,7 +164,7 @@ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock;" OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ @@ -199,8 +193,7 @@ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ "# atomic_cmpset_" #TYPE " " \ : "=@cce" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ @@ -216,8 +209,7 @@ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ "# atomic_fcmpset_" #TYPE " " \ : "=@cce" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ @@ -241,8 +233,7 @@ { __asm __volatile( - " " MPLOCKED " " - " xaddl %0,%1 ; " + " lock; xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ @@ -259,8 +250,7 @@ { __asm __volatile( - " " MPLOCKED " " - " xaddq %0,%1 ; " + " lock; xaddq %0,%1 ; " "# atomic_fetchadd_long" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ @@ -274,8 +264,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " btsl %2,%1 ; " + " lock; btsl %2,%1 ; " "# atomic_testandset_int" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -290,8 +279,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " btsq %2,%1 ; " + " lock; btsq %2,%1 ; " "# atomic_testandset_long" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -306,8 +294,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " btrl %2,%1 ; " + " lock; btrl %2,%1 ; " "# atomic_testandclear_int" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -322,8 +309,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " btrq %2,%1 ; " + " lock; btrq %2,%1 ; " "# atomic_testandclear_long" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -344,39 +330,18 @@ * special address for "mem". In the kernel, we use a private per-cpu * cache line. In user space, we use a word in the stack's red zone * (-8(%rsp)). - * - * For UP kernels, however, the memory of the single processor is - * always consistent, so we only need to stop the compiler from - * reordering accesses in a way that violates the semantics of acquire - * and release. */ -#if defined(_KERNEL) - -#if defined(SMP) || defined(KLD_MODULE) static __inline void __storeload_barrier(void) { - +#if defined(_KERNEL) __asm __volatile("lock; addl $0,%%gs:%0" : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc"); -} -#else /* _KERNEL && UP */ -static __inline void -__storeload_barrier(void) -{ - - __compiler_membar(); -} -#endif /* SMP */ #else /* !_KERNEL */ -static __inline void -__storeload_barrier(void) -{ - __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); -} #endif /* _KERNEL*/ +} #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ @@ -428,7 +393,7 @@ __storeload_barrier(); } -#endif /* KLD_MODULE || !__GNUCLIKE_ASM */ +#endif /* !__GNUCLIKE_ASM */ ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -138,19 +138,13 @@ void atomic_add_64(volatile uint64_t *, uint64_t); void atomic_subtract_64(volatile uint64_t *, uint64_t); -#else /* !KLD_MODULE && __GNUCLIKE_ASM */ - -/* - * For userland, always use lock prefixes so that the binaries will run - * on both SMP and !SMP systems. - */ -#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE) -#define MPLOCKED "lock ; " -#else -#define MPLOCKED -#endif +#else /* !__GNUCLIKE_ASM */ /* + * Always use lock prefixes. The result is slighly less optimal for + * UP systems, but it matters less now, and sometimes UP is emulated + * over SMP. + * * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. @@ -159,7 +153,7 @@ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ @@ -168,7 +162,7 @@ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ @@ -197,8 +191,7 @@ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ " sete %0 ; " \ "# atomic_cmpset_" #TYPE " " \ : "=q" (res), /* 0 */ \ @@ -215,8 +208,7 @@ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ " sete %0 ; " \ "# atomic_fcmpset_" #TYPE " " \ : "=q" (res), /* 0 */ \ @@ -240,8 +232,7 @@ { __asm __volatile( - " " MPLOCKED " " - " xaddl %0,%1 ; " + " lock; xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ @@ -255,8 +246,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " btsl %2,%1 ; " + " lock; btsl %2,%1 ; " " setc %0 ; " "# atomic_testandset_int" : "=q" (res), /* 0 */ @@ -272,8 +262,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " btrl %2,%1 ; " + " lock; btrl %2,%1 ; " " setc %0 ; " "# atomic_testandclear_int" : "=q" (res), /* 0 */ @@ -303,11 +292,7 @@ */ #if defined(_KERNEL) -#if defined(SMP) || defined(KLD_MODULE) #define __storeload_barrier() __mbk() -#else /* _KERNEL && UP */ -#define __storeload_barrier() __compiler_membar() -#endif /* SMP */ #else /* !_KERNEL */ #define __storeload_barrier() __mbu() #endif /* _KERNEL*/ @@ -484,8 +469,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " cmpxchg8b %1 ; " + " lock; cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ @@ -502,8 +486,7 @@ u_char res; __asm __volatile( - " " MPLOCKED " " - " cmpxchg8b %1 ; " + " lock; cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ @@ -522,8 +505,7 @@ __asm __volatile( " movl %%ebx,%%eax ; " " movl %%ecx,%%edx ; " - " " MPLOCKED " " - " cmpxchg8b %1" + " lock; cmpxchg8b %1" : "=&A" (res), /* 0 */ "+m" (*p) /* 1 */ : : "memory", "cc"); @@ -538,8 +520,7 @@ " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " - " " MPLOCKED " " - " cmpxchg8b %0 ; " + " lock; cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ @@ -554,8 +535,7 @@ " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " - " " MPLOCKED " " - " cmpxchg8b %0 ; " + " lock; cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ @@ -650,7 +630,7 @@ #endif /* _KERNEL */ -#endif /* KLD_MODULE || !__GNUCLIKE_ASM */ +#endif /* !__GNUCLIKE_ASM */ ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v);