diff --git a/sys/arm/include/atomic.h b/sys/arm/include/atomic.h
index dd2ad47e9bed..d632a6db89df 100644
--- a/sys/arm/include/atomic.h
+++ b/sys/arm/include/atomic.h
@@ -1,1114 +1,1110 @@
 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 2003-2004 Olivier Houchard
  * Copyright (C) 1994-1997 Mark Brinicombe
  * Copyright (C) 1994 Brini
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Brini.
  * 4. The name of Brini may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef	_MACHINE_ATOMIC_H_
 #define	_MACHINE_ATOMIC_H_
 
 #include <sys/atomic_common.h>
 
 #if __ARM_ARCH >= 7
 #define isb()  __asm __volatile("isb" : : : "memory")
 #define dsb()  __asm __volatile("dsb" : : : "memory")
 #define dmb()  __asm __volatile("dmb" : : : "memory")
 #else
 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
 #endif
 
 #define mb()   dmb()
 #define wmb()  dmb()
 #define rmb()  dmb()
 
 #define	ARM_HAVE_ATOMIC64
 
 #define ATOMIC_ACQ_REL_LONG(NAME)					\
 static __inline void							\
 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
 {									\
 	atomic_##NAME##_long(p, v);					\
 	dmb();								\
 }									\
 									\
 static __inline  void							\
 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
 {									\
 	dmb();								\
 	atomic_##NAME##_long(p, v);					\
 }
 
 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
 static __inline  void							\
 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {									\
 	atomic_##NAME##_##WIDTH(p, v);					\
 	dmb();								\
 }									\
 									\
 static __inline  void							\
 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {									\
 	dmb();								\
 	atomic_##NAME##_##WIDTH(p, v);					\
 }
 
 static __inline void
 atomic_add_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp = 0, tmp2 = 0;
 
 	__asm __volatile(
 	    "1: ldrex	%0, [%2]	\n"
 	    "   add	%0, %0, %3	\n"
 	    "   strex	%1, %0, [%2]	\n"
 	    "   cmp	%1, #0		\n"
 	    "   it	ne		\n"
 	    "   bne	1b		\n"
 	    : "=&r" (tmp), "+r" (tmp2)
 	    ,"+r" (p), "+r" (val) : : "cc", "memory");
 }
 
 static __inline void
 atomic_add_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 	    "   adds	%Q[tmp], %Q[val]			\n"
 	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [exf] "=&r" (exflag),
 	      [tmp] "=&r" (tmp)
 	    : [ptr] "r"   (p),
 	      [val] "r"   (val)
 	    : "cc", "memory");
 }
 
 static __inline void
 atomic_add_long(volatile u_long *p, u_long val)
 {
 
 	atomic_add_32((volatile uint32_t *)p, val);
 }
 
 ATOMIC_ACQ_REL(add, 32)
 ATOMIC_ACQ_REL(add, 64)
 ATOMIC_ACQ_REL_LONG(add)
 
 static __inline void
 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
 {
 	uint32_t tmp = 0, tmp2 = 0;
 
 	__asm __volatile(
 	    "1: ldrex	%0, [%2]	\n"
 	    "   bic	%0, %0, %3	\n"
 	    "   strex	%1, %0, [%2]	\n"
 	    "   cmp	%1, #0		\n"
 	    "   it	ne		\n"
 	    "   bne	1b		\n"
 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 	    : : "cc", "memory");
 }
 
 static __inline void
 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 	    "   bic	%Q[tmp], %Q[val]			\n"
 	    "   bic	%R[tmp], %R[val]			\n"
 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [exf] "=&r" (exflag),
 	      [tmp] "=&r" (tmp)
 	    : [ptr] "r"   (p),
 	      [val] "r"   (val)
 	    : "cc", "memory");
 }
 
 static __inline void
 atomic_clear_long(volatile u_long *address, u_long setmask)
 {
 
 	atomic_clear_32((volatile uint32_t *)address, setmask);
 }
 
 ATOMIC_ACQ_REL(clear, 32)
 ATOMIC_ACQ_REL(clear, 64)
 ATOMIC_ACQ_REL_LONG(clear)
 
 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
     {                                                         \
 	TYPE tmp;                                             \
                                                               \
 	__asm __volatile(                                     \
 	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
 	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
 	    "   teq            %[tmp], %[ret]            \n"  \
 	    "   ittee          ne                        \n"  \
 	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
 	    "   movne          %[ret], #0                \n"  \
 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
 	    "   eorseq         %[ret], #1                \n"  \
 	    "   beq            1b                        \n"  \
 	    : [ret] "=&r" (RET),                              \
 	      [tmp] "=&r" (tmp)                               \
 	    : [ptr] "r"   (_ptr),                             \
 	      [oldv] "r"  (_old),                             \
 	      [newv] "r"  (_new)                              \
 	    : "cc", "memory");                                \
     }
 
 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
     {                                                              \
 	uint64_t cmp, tmp;                                         \
                                                                    \
 	__asm __volatile(                                          \
 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
 	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
 	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
 	    "   it       eq                                   \n"  \
 	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
 	    "   ittee    ne                                   \n"  \
 	    "   movne    %[ret], #0                           \n"  \
 	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
 	    "   eorseq   %[ret], #1                           \n"  \
 	    "   beq      1b                                   \n"  \
 	    : [ret] "=&r" (RET),                                   \
 	      [cmp] "=&r" (cmp),                                   \
 	      [tmp] "=&r" (tmp)                                    \
 	    : [ptr] "r"   (_ptr),                                  \
 	      [oldv] "r"  (_old),                                  \
 	      [newv] "r"  (_new)                                   \
 	    : "cc", "memory");                                     \
     }
 
 static __inline int
 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 	return (ret);
 }
 #define	atomic_fcmpset_8	atomic_fcmpset_8
 
 static __inline int
 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 	return (ret);
 }
 #define	atomic_fcmpset_16	atomic_fcmpset_16
 
 static __inline int
 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE64(ret);
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 {
 	int ret;
 
 	ATOMIC_FCMPSET_CODE64(ret);
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_FCMPSET_CODE64(ret);
 	return (ret);
 }
 
 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
     {                                                        \
 	__asm __volatile(                                    \
 	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
 	    "   teq            %[ret], %[oldv]           \n" \
 	    "   itee           ne                        \n" \
 	    "   movne          %[ret], #0                \n" \
 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
 	    "   eorseq         %[ret], #1                \n" \
 	    "   beq            1b                        \n" \
 	    : [ret] "=&r" (RET)                              \
 	    : [ptr] "r"   (_ptr),                            \
 	      [oldv] "r"  (_old),                            \
 	      [newv] "r"  (_new)                             \
 	    : "cc", "memory");                               \
     }
 
 #define ATOMIC_CMPSET_CODE64(RET)                                 \
     {                                                             \
 	uint64_t tmp;                                             \
 	                                                          \
 	__asm __volatile(                                         \
 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
 	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
 	    "   it       eq                                   \n" \
 	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
 	    "   itee     ne                                   \n" \
 	    "   movne    %[ret], #0                           \n" \
 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
 	    "   eorseq   %[ret], #1                           \n" \
 	    "   beq      1b                                   \n" \
 	    : [ret] "=&r" (RET),                                  \
 	      [tmp] "=&r" (tmp)                                   \
 	    : [ptr] "r"   (_ptr),                                 \
 	      [oldv] "r"  (_old),                                 \
 	      [newv] "r"  (_new)                                  \
 	    : "cc", "memory");                                    \
     }
 
 static __inline int
 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "b");
 	return (ret);
 }
 #define	atomic_cmpset_8		atomic_cmpset_8
 
 static __inline int
 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "b");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_CMPSET_CODE(ret, "b");
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "h");
 	return (ret);
 }
 #define	atomic_cmpset_16	atomic_cmpset_16
 
 static __inline int
 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "h");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_CMPSET_CODE(ret, "h");
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "");
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_CMPSET_CODE(ret, "");
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "");
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE(ret, "");
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_CMPSET_CODE(ret, "");
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE64(ret);
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 {
 	int ret;
 
 	ATOMIC_CMPSET_CODE64(ret);
 	dmb();
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 {
 	int ret;
 
 	dmb();
 	ATOMIC_CMPSET_CODE64(ret);
 	return (ret);
 }
 
 static __inline uint32_t
 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
 
 	__asm __volatile(
 	    "1: ldrex	%0, [%3]	\n"
 	    "   add	%1, %0, %4	\n"
 	    "   strex	%2, %1, [%3]	\n"
 	    "   cmp	%2, #0		\n"
 	    "   it	ne		\n"
 	    "   bne	1b		\n"
 	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 	    : : "cc", "memory");
 	return (ret);
 }
 
 static __inline uint64_t
 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t ret, tmp;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
 	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
 	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [ret] "=&r" (ret),
 	      [exf] "=&r" (exflag),
 	      [tmp] "=&r" (tmp)
 	    : [ptr] "r"   (p),
 	      [val] "r"   (val)
 	    : "cc", "memory");
 	return (ret);
 }
 
 static __inline u_long
 atomic_fetchadd_long(volatile u_long *p, u_long val)
 {
 
 	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
 }
 
 static __inline uint32_t
 atomic_load_acq_32(volatile uint32_t *p)
 {
 	uint32_t v;
 
 	v = *p;
 	dmb();
 	return (v);
 }
 
 static __inline uint64_t
 atomic_load_64(volatile uint64_t *p)
 {
 	uint64_t ret;
 
 	/*
 	 * The only way to atomically load 64 bits is with LDREXD which puts the
 	 * exclusive monitor into the exclusive state, so reset it to open state
 	 * with CLREX because we don't actually need to store anything.
 	 */
 	__asm __volatile(
 	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
 	    "clrex					\n"
 	    : [ret] "=&r" (ret)
 	    : [ptr] "r"   (p)
 	    : "cc", "memory");
 	return (ret);
 }
 
 static __inline uint64_t
 atomic_load_acq_64(volatile uint64_t *p)
 {
 	uint64_t ret;
 
 	ret = atomic_load_64(p);
 	dmb();
 	return (ret);
 }
 
 static __inline u_long
 atomic_load_acq_long(volatile u_long *p)
 {
 	u_long v;
 
 	v = *p;
 	dmb();
 	return (v);
 }
 
 static __inline uint32_t
 atomic_readandclear_32(volatile uint32_t *p)
 {
 	uint32_t ret, tmp = 0, tmp2 = 0;
 
 	__asm __volatile(
 	    "1: ldrex	%0, [%3]	\n"
 	    "   mov	%1, #0		\n"
 	    "   strex	%2, %1, [%3]	\n"
 	    "   cmp	%2, #0		\n"
 	    "   it	ne		\n"
 	    "   bne	1b		\n"
 	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
 	    : : "cc", "memory");
 	return (ret);
 }
 
 static __inline uint64_t
 atomic_readandclear_64(volatile uint64_t *p)
 {
 	uint64_t ret, tmp;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
 	    "   mov	%Q[tmp], #0				\n"
 	    "   mov	%R[tmp], #0				\n"
 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [ret] "=&r" (ret),
 	      [exf] "=&r" (exflag),
 	      [tmp] "=&r" (tmp)
 	    : [ptr] "r"   (p)
 	    : "cc", "memory");
 	return (ret);
 }
 
 static __inline u_long
 atomic_readandclear_long(volatile u_long *p)
 {
 
 	return (atomic_readandclear_32((volatile uint32_t *)p));
 }
 
 static __inline void
 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
 {
 	uint32_t tmp = 0, tmp2 = 0;
 
 	__asm __volatile(
 	    "1: ldrex	%0, [%2]	\n"
 	    "   orr	%0, %0, %3	\n"
 	    "   strex	%1, %0, [%2]	\n"
 	    "   cmp	%1, #0		\n"
 	    "   it	ne		\n"
 	    "   bne	1b		\n"
 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 	    : : "cc", "memory");
 }
 
 static __inline void
 atomic_set_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 	    "   orr	%Q[tmp], %Q[val]			\n"
 	    "   orr	%R[tmp], %R[val]			\n"
 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [exf] "=&r" (exflag),
 	      [tmp] "=&r" (tmp)
 	    : [ptr] "r"   (p),
 	      [val] "r"   (val)
 	    : "cc", "memory");
 }
 
 static __inline void
 atomic_set_long(volatile u_long *address, u_long setmask)
 {
 
 	atomic_set_32((volatile uint32_t *)address, setmask);
 }
 
 ATOMIC_ACQ_REL(set, 32)
 ATOMIC_ACQ_REL(set, 64)
 ATOMIC_ACQ_REL_LONG(set)
 
 static __inline void
 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t tmp = 0, tmp2 = 0;
 
 	__asm __volatile(
 	    "1: ldrex	%0, [%2]	\n"
 	    "   sub	%0, %0, %3	\n"
 	    "   strex	%1, %0, [%2]	\n"
 	    "   cmp	%1, #0		\n"
 	    "   it	ne		\n"
 	    "   bne	1b		\n"
 	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 	    : : "cc", "memory");
 }
 
 static __inline void
 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 	    "   subs	%Q[tmp], %Q[val]			\n"
 	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [exf] "=&r" (exflag),
 	      [tmp] "=&r" (tmp)
 	    : [ptr] "r"   (p),
 	      [val] "r"   (val)
 	    : "cc", "memory");
 }
 
 static __inline void
 atomic_subtract_long(volatile u_long *p, u_long val)
 {
 
 	atomic_subtract_32((volatile uint32_t *)p, val);
 }
 
 ATOMIC_ACQ_REL(subtract, 32)
 ATOMIC_ACQ_REL(subtract, 64)
 ATOMIC_ACQ_REL_LONG(subtract)
 
 static __inline void
 atomic_store_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t tmp;
 	uint32_t exflag;
 
 	/*
 	 * The only way to atomically store 64 bits is with STREXD, which will
 	 * succeed only if paired up with a preceeding LDREXD using the same
 	 * address, so we read and discard the existing value before storing.
 	 */
 	__asm __volatile(
 	    "1:							\n"
 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [tmp] "=&r" (tmp),
 	      [exf] "=&r" (exflag)
 	    : [ptr] "r"   (p),
 	      [val] "r"   (val)
 	    : "cc", "memory");
 }
 
 static __inline void
 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
 {
 
 	dmb();
 	*p = v;
 }
 
 static __inline void
 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 {
 
 	dmb();
 	atomic_store_64(p, val);
 }
 
 static __inline void
 atomic_store_rel_long(volatile u_long *p, u_long v)
 {
 
 	dmb();
 	*p = v;
 }
 
 static __inline int
 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
 {
 	int newv, oldv, result;
 
 	__asm __volatile(
 	    "   mov     ip, #1					\n"
 	    "   lsl     ip, ip, %[bit]				\n"
 	    /*  Done with %[bit] as input, reuse below as output. */
 	    "1:							\n"
 	    "   ldrex	%[oldv], [%[ptr]]			\n"
 	    "   bic     %[newv], %[oldv], ip			\n"
 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
 	    "   teq	%[bit], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    "   ands	%[bit], %[oldv], ip			\n"
 	    "   it	ne					\n"
 	    "   movne   %[bit], #1                              \n"
 	    : [bit]  "=&r"   (result),
 	      [oldv] "=&r"   (oldv),
 	      [newv] "=&r"   (newv)
 	    : [ptr]  "r"     (ptr),
 	             "[bit]" (bit & 0x1f)
 	    : "cc", "ip", "memory");
 
 	return (result);
 }
 
 static __inline int
 atomic_testandclear_int(volatile u_int *p, u_int v)
 {
 
 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
 }
 
 static __inline int
 atomic_testandclear_long(volatile u_long *p, u_int v)
 {
 
 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
 }
-#define	atomic_testandclear_long	atomic_testandclear_long
-
 
 static __inline int
 atomic_testandclear_64(volatile uint64_t *p, u_int v)
 {
 	volatile uint32_t *p32;
 
 	p32 = (volatile uint32_t *)p;
 	/*
 	 * Assume little-endian,
 	 * atomic_testandclear_32() uses only last 5 bits of v
 	 */
 	if ((v & 0x20) != 0)
 		p32++;
 	return (atomic_testandclear_32(p32, v));
 }
 
 static __inline int
 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
 {
 	int newv, oldv, result;
 
 	__asm __volatile(
 	    "   mov     ip, #1					\n"
 	    "   lsl     ip, ip, %[bit]				\n"
 	    /*  Done with %[bit] as input, reuse below as output. */
 	    "1:							\n"
 	    "   ldrex	%[oldv], [%[ptr]]			\n"
 	    "   orr     %[newv], %[oldv], ip			\n"
 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
 	    "   teq	%[bit], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    "   ands	%[bit], %[oldv], ip			\n"
 	    "   it	ne					\n"
 	    "   movne   %[bit], #1                              \n"
 	    : [bit]  "=&r"   (result),
 	      [oldv] "=&r"   (oldv),
 	      [newv] "=&r"   (newv)
 	    : [ptr]  "r"     (ptr),
 	             "[bit]" (bit & 0x1f)
 	    : "cc", "ip", "memory");
 
 	return (result);
 }
 
 static __inline int
 atomic_testandset_int(volatile u_int *p, u_int v)
 {
 
 	return (atomic_testandset_32((volatile uint32_t *)p, v));
 }
 
 static __inline int
 atomic_testandset_long(volatile u_long *p, u_int v)
 {
 
 	return (atomic_testandset_32((volatile uint32_t *)p, v));
 }
-#define	atomic_testandset_long	atomic_testandset_long
 
 static __inline int
 atomic_testandset_acq_long(volatile u_long *p, u_int v)
 {
 	int ret;
 
 	ret = atomic_testandset_32((volatile uint32_t *)p, v);
 	dmb();
 	return (ret);
 }
-#define	atomic_testandset_acq_long	atomic_testandset_acq_long
 
 static __inline int
 atomic_testandset_64(volatile uint64_t *p, u_int v)
 {
 	volatile uint32_t *p32;
 
 	p32 = (volatile uint32_t *)p;
 	/*
 	 * Assume little-endian,
 	 * atomic_testandset_32() uses only last 5 bits of v
 	 */
 	if ((v & 0x20) != 0)
 		p32++;
 	return (atomic_testandset_32(p32, v));
 }
 
 static __inline uint32_t
 atomic_swap_32(volatile uint32_t *p, uint32_t v)
 {
 	uint32_t ret, exflag;
 
 	__asm __volatile(
 	    "1: ldrex	%[ret], [%[ptr]]		\n"
 	    "   strex	%[exf], %[val], [%[ptr]]	\n"
 	    "   teq	%[exf], #0			\n"
 	    "   it	ne				\n"
 	    "   bne	1b				\n"
 	    : [ret] "=&r"  (ret),
 	      [exf] "=&r" (exflag)
 	    : [val] "r"  (v),
 	      [ptr] "r"  (p)
 	    : "cc", "memory");
 	return (ret);
 }
 
 static __inline u_long
 atomic_swap_long(volatile u_long *p, u_long v)
 {
 
 	return (atomic_swap_32((volatile uint32_t *)p, v));
 }
 
 static __inline uint64_t
 atomic_swap_64(volatile uint64_t *p, uint64_t v)
 {
 	uint64_t ret;
 	uint32_t exflag;
 
 	__asm __volatile(
 	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
 	    "   teq	%[exf], #0				\n"
 	    "   it	ne					\n"
 	    "   bne	1b					\n"
 	    : [ret] "=&r" (ret),
 	      [exf] "=&r" (exflag)
 	    : [val] "r"   (v),
 	      [ptr] "r"   (p)
 	    : "cc", "memory");
 	return (ret);
 }
 
 #undef ATOMIC_ACQ_REL
 #undef ATOMIC_ACQ_REL_LONG
 
 static __inline void
 atomic_thread_fence_acq(void)
 {
 
 	dmb();
 }
 
 static __inline void
 atomic_thread_fence_rel(void)
 {
 
 	dmb();
 }
 
 static __inline void
 atomic_thread_fence_acq_rel(void)
 {
 
 	dmb();
 }
 
 static __inline void
 atomic_thread_fence_seq_cst(void)
 {
 
 	dmb();
 }
 
 #define atomic_clear_ptr		atomic_clear_32
 #define atomic_clear_acq_ptr		atomic_clear_acq_32
 #define atomic_clear_rel_ptr		atomic_clear_rel_32
 #define atomic_set_ptr			atomic_set_32
 #define atomic_set_acq_ptr		atomic_set_acq_32
 #define atomic_set_rel_ptr		atomic_set_rel_32
 #define atomic_fcmpset_ptr		atomic_fcmpset_32
 #define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
 #define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
 #define atomic_cmpset_ptr		atomic_cmpset_32
 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
 #define atomic_load_acq_ptr		atomic_load_acq_32
 #define atomic_store_rel_ptr		atomic_store_rel_32
 #define atomic_swap_ptr			atomic_swap_32
 #define atomic_readandclear_ptr		atomic_readandclear_32
 
 #define atomic_add_int			atomic_add_32
 #define atomic_add_acq_int		atomic_add_acq_32
 #define atomic_add_rel_int		atomic_add_rel_32
 #define atomic_subtract_int		atomic_subtract_32
 #define atomic_subtract_acq_int		atomic_subtract_acq_32
 #define atomic_subtract_rel_int		atomic_subtract_rel_32
 #define atomic_clear_int		atomic_clear_32
 #define atomic_clear_acq_int		atomic_clear_acq_32
 #define atomic_clear_rel_int		atomic_clear_rel_32
 #define atomic_set_int			atomic_set_32
 #define atomic_set_acq_int		atomic_set_acq_32
 #define atomic_set_rel_int		atomic_set_rel_32
 #define atomic_fcmpset_int		atomic_fcmpset_32
 #define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
 #define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
 #define atomic_cmpset_int		atomic_cmpset_32
 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
 #define atomic_fetchadd_int		atomic_fetchadd_32
 #define atomic_readandclear_int		atomic_readandclear_32
 #define atomic_load_acq_int		atomic_load_acq_32
 #define atomic_store_rel_int		atomic_store_rel_32
 #define atomic_swap_int			atomic_swap_32
 
 /*
  * For:
  *  - atomic_load_acq_8
  *  - atomic_load_acq_16
  */
 #include <sys/_atomic_subword.h>
 
 #endif /* _MACHINE_ATOMIC_H_ */
diff --git a/sys/powerpc/include/atomic.h b/sys/powerpc/include/atomic.h
index 77ee5d8c6d46..47fd9ce30fd6 100644
--- a/sys/powerpc/include/atomic.h
+++ b/sys/powerpc/include/atomic.h
@@ -1,1148 +1,1142 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2008 Marcel Moolenaar
  * Copyright (c) 2001 Benno Rice
  * Copyright (c) 2001 David E. O'Brien
  * Copyright (c) 1998 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _MACHINE_ATOMIC_H_
 #define	_MACHINE_ATOMIC_H_
 
 #include <sys/atomic_common.h>
 
 #ifndef __powerpc64__
 #include <sys/_atomic64e.h>
 #endif
 
 /*
  * The __ATOMIC_REL/ACQ() macros provide memory barriers only in conjunction
  * with the atomic lXarx/stXcx. sequences below. They are not exposed outside
  * of this file. See also Appendix B.2 of Book II of the architecture manual.
  *
  * Note that not all Book-E processors accept the light-weight sync variant.
  * In particular, early models of E500 cores are known to wedge. Bank on all
  * 64-bit capable CPUs to accept lwsync properly and pressimize 32-bit CPUs
  * to use the heavier-weight sync.
  */
 
 #ifdef __powerpc64__
 #define mb()		__asm __volatile("sync" : : : "memory")
 #define rmb()		__asm __volatile("lwsync" : : : "memory")
 #define wmb()		__asm __volatile("lwsync" : : : "memory")
 #define __ATOMIC_REL()	__asm __volatile("lwsync" : : : "memory")
 #define __ATOMIC_ACQ()	__asm __volatile("isync" : : : "memory")
 #else
 #define mb()		__asm __volatile("sync" : : : "memory")
 #define rmb()		__asm __volatile("sync" : : : "memory")
 #define wmb()		__asm __volatile("sync" : : : "memory")
 #define __ATOMIC_REL()	__asm __volatile("sync" : : : "memory")
 #define __ATOMIC_ACQ()	__asm __volatile("isync" : : : "memory")
 #endif
 
 static __inline void
 powerpc_lwsync(void)
 {
 
 #ifdef __powerpc64__
 	__asm __volatile("lwsync" : : : "memory");
 #else
 	__asm __volatile("sync" : : : "memory");
 #endif
 }
 
 /*
  * atomic_add(p, v)
  * { *p += v; }
  */
 
 #define __atomic_add_int(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	add	%0, %3, %0\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_add_int */
 
 #ifdef __powerpc64__
 #define __atomic_add_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	ldarx	%0, 0, %2\n"				\
 	"	add	%0, %3, %0\n"				\
 	"	stdcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_add_long */
 #else
 #define	__atomic_add_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	add	%0, %3, %0\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_add_long */
 #endif
 
 #define	_ATOMIC_ADD(type)					\
     static __inline void					\
     atomic_add_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__atomic_add_##type(p, v, t);				\
     }								\
 								\
     static __inline void					\
     atomic_add_acq_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__atomic_add_##type(p, v, t);				\
 	__ATOMIC_ACQ();						\
     }								\
 								\
     static __inline void					\
     atomic_add_rel_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__ATOMIC_REL();						\
 	__atomic_add_##type(p, v, t);				\
     }								\
     /* _ATOMIC_ADD */
 
 _ATOMIC_ADD(int)
 _ATOMIC_ADD(long)
 
 #define	atomic_add_32		atomic_add_int
 #define	atomic_add_acq_32	atomic_add_acq_int
 #define	atomic_add_rel_32	atomic_add_rel_int
 
 #ifdef __powerpc64__
 #define	atomic_add_64		atomic_add_long
 #define	atomic_add_acq_64	atomic_add_acq_long
 #define	atomic_add_rel_64	atomic_add_rel_long
 
 #define	atomic_add_ptr		atomic_add_long
 #define	atomic_add_acq_ptr	atomic_add_acq_long
 #define	atomic_add_rel_ptr	atomic_add_rel_long
 #else
 #define	atomic_add_ptr		atomic_add_int
 #define	atomic_add_acq_ptr	atomic_add_acq_int
 #define	atomic_add_rel_ptr	atomic_add_rel_int
 #endif
 #undef _ATOMIC_ADD
 #undef __atomic_add_long
 #undef __atomic_add_int
 
 /*
  * atomic_clear(p, v)
  * { *p &= ~v; }
  */
 
 #define __atomic_clear_int(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	andc	%0, %0, %3\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_clear_int */
 
 #ifdef __powerpc64__
 #define __atomic_clear_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	ldarx	%0, 0, %2\n"				\
 	"	andc	%0, %0, %3\n"				\
 	"	stdcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_clear_long */
 #else
 #define	__atomic_clear_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	andc	%0, %0, %3\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_clear_long */
 #endif
 
 #define	_ATOMIC_CLEAR(type)					\
     static __inline void					\
     atomic_clear_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__atomic_clear_##type(p, v, t);				\
     }								\
 								\
     static __inline void					\
     atomic_clear_acq_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__atomic_clear_##type(p, v, t);				\
 	__ATOMIC_ACQ();						\
     }								\
 								\
     static __inline void					\
     atomic_clear_rel_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__ATOMIC_REL();						\
 	__atomic_clear_##type(p, v, t);				\
     }								\
     /* _ATOMIC_CLEAR */
 
 _ATOMIC_CLEAR(int)
 _ATOMIC_CLEAR(long)
 
 #define	atomic_clear_32		atomic_clear_int
 #define	atomic_clear_acq_32	atomic_clear_acq_int
 #define	atomic_clear_rel_32	atomic_clear_rel_int
 
 #ifdef __powerpc64__
 #define	atomic_clear_64		atomic_clear_long
 #define	atomic_clear_acq_64	atomic_clear_acq_long
 #define	atomic_clear_rel_64	atomic_clear_rel_long
 
 #define	atomic_clear_ptr	atomic_clear_long
 #define	atomic_clear_acq_ptr	atomic_clear_acq_long
 #define	atomic_clear_rel_ptr	atomic_clear_rel_long
 #else
 #define	atomic_clear_ptr	atomic_clear_int
 #define	atomic_clear_acq_ptr	atomic_clear_acq_int
 #define	atomic_clear_rel_ptr	atomic_clear_rel_int
 #endif
 #undef _ATOMIC_CLEAR
 #undef __atomic_clear_long
 #undef __atomic_clear_int
 
 /*
  * atomic_cmpset(p, o, n)
  */
 /* TODO -- see below */
 
 /*
  * atomic_load_acq(p)
  */
 /* TODO -- see below */
 
 /*
  * atomic_readandclear(p)
  */
 /* TODO -- see below */
 
 /*
  * atomic_set(p, v)
  * { *p |= v; }
  */
 
 #define __atomic_set_int(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	or	%0, %3, %0\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_set_int */
 
 #ifdef __powerpc64__
 #define __atomic_set_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	ldarx	%0, 0, %2\n"				\
 	"	or	%0, %3, %0\n"				\
 	"	stdcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_set_long */
 #else
 #define	__atomic_set_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	or	%0, %3, %0\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_set_long */
 #endif
 
 #define	_ATOMIC_SET(type)					\
     static __inline void					\
     atomic_set_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__atomic_set_##type(p, v, t);				\
     }								\
 								\
     static __inline void					\
     atomic_set_acq_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__atomic_set_##type(p, v, t);				\
 	__ATOMIC_ACQ();						\
     }								\
 								\
     static __inline void					\
     atomic_set_rel_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;						\
 	__ATOMIC_REL();						\
 	__atomic_set_##type(p, v, t);				\
     }								\
     /* _ATOMIC_SET */
 
 _ATOMIC_SET(int)
 _ATOMIC_SET(long)
 
 #define	atomic_set_32		atomic_set_int
 #define	atomic_set_acq_32	atomic_set_acq_int
 #define	atomic_set_rel_32	atomic_set_rel_int
 
 #ifdef __powerpc64__
 #define	atomic_set_64		atomic_set_long
 #define	atomic_set_acq_64	atomic_set_acq_long
 #define	atomic_set_rel_64	atomic_set_rel_long
 
 #define	atomic_set_ptr		atomic_set_long
 #define	atomic_set_acq_ptr	atomic_set_acq_long
 #define	atomic_set_rel_ptr	atomic_set_rel_long
 #else
 #define	atomic_set_ptr		atomic_set_int
 #define	atomic_set_acq_ptr	atomic_set_acq_int
 #define	atomic_set_rel_ptr	atomic_set_rel_int
 #endif
 #undef _ATOMIC_SET
 #undef __atomic_set_long
 #undef __atomic_set_int
 
 /*
  * atomic_subtract(p, v)
  * { *p -= v; }
  */
 
 #define __atomic_subtract_int(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	subf	%0, %3, %0\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_subtract_int */
 
 #ifdef __powerpc64__
 #define __atomic_subtract_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	ldarx	%0, 0, %2\n"				\
 	"	subf	%0, %3, %0\n"				\
 	"	stdcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_subtract_long */
 #else
 #define	__atomic_subtract_long(p, v, t)				\
     __asm __volatile(						\
 	"1:	lwarx	%0, 0, %2\n"				\
 	"	subf	%0, %3, %0\n"				\
 	"	stwcx.	%0, 0, %2\n"				\
 	"	bne-	1b\n"					\
 	: "=&r" (t), "=m" (*p)					\
 	: "r" (p), "r" (v), "m" (*p)				\
 	: "cr0", "memory")					\
     /* __atomic_subtract_long */
 #endif
 
 #define	_ATOMIC_SUBTRACT(type)						\
     static __inline void						\
     atomic_subtract_##type(volatile u_##type *p, u_##type v) {		\
 	u_##type t;							\
 	__atomic_subtract_##type(p, v, t);				\
     }									\
 									\
     static __inline void						\
     atomic_subtract_acq_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;							\
 	__atomic_subtract_##type(p, v, t);				\
 	__ATOMIC_ACQ();							\
     }									\
 									\
     static __inline void						\
     atomic_subtract_rel_##type(volatile u_##type *p, u_##type v) {	\
 	u_##type t;							\
 	__ATOMIC_REL();							\
 	__atomic_subtract_##type(p, v, t);				\
     }									\
     /* _ATOMIC_SUBTRACT */
 
 _ATOMIC_SUBTRACT(int)
 _ATOMIC_SUBTRACT(long)
 
 #define	atomic_subtract_32	atomic_subtract_int
 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
 
 #ifdef __powerpc64__
 #define	atomic_subtract_64	atomic_subtract_long
 #define	atomic_subtract_acq_64	atomic_subract_acq_long
 #define	atomic_subtract_rel_64	atomic_subtract_rel_long
 
 #define	atomic_subtract_ptr	atomic_subtract_long
 #define	atomic_subtract_acq_ptr	atomic_subtract_acq_long
 #define	atomic_subtract_rel_ptr	atomic_subtract_rel_long
 #else
 #define	atomic_subtract_ptr	atomic_subtract_int
 #define	atomic_subtract_acq_ptr	atomic_subtract_acq_int
 #define	atomic_subtract_rel_ptr	atomic_subtract_rel_int
 #endif
 #undef _ATOMIC_SUBTRACT
 #undef __atomic_subtract_long
 #undef __atomic_subtract_int
 
 /*
  * atomic_store_rel(p, v)
  */
 /* TODO -- see below */
 
 /*
  * Old/original implementations that still need revisiting.
  */
 
 static __inline u_int
 atomic_readandclear_int(volatile u_int *addr)
 {
 	u_int result,temp;
 
 	__asm __volatile (
 		"\tsync\n"			/* drain writes */
 		"1:\tlwarx %0, 0, %3\n\t"	/* load old value */
 		"li %1, 0\n\t"			/* load new value */
 		"stwcx. %1, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		: "=&r"(result), "=&r"(temp), "=m" (*addr)
 		: "r" (addr), "m" (*addr)
 		: "cr0", "memory");
 
 	return (result);
 }
 
 #ifdef __powerpc64__
 static __inline u_long
 atomic_readandclear_long(volatile u_long *addr)
 {
 	u_long result,temp;
 
 	__asm __volatile (
 		"\tsync\n"			/* drain writes */
 		"1:\tldarx %0, 0, %3\n\t"	/* load old value */
 		"li %1, 0\n\t"			/* load new value */
 		"stdcx. %1, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		: "=&r"(result), "=&r"(temp), "=m" (*addr)
 		: "r" (addr), "m" (*addr)
 		: "cr0", "memory");
 
 	return (result);
 }
 #endif
 
 #define	atomic_readandclear_32		atomic_readandclear_int
 
 #ifdef __powerpc64__
 #define	atomic_readandclear_64		atomic_readandclear_long
 
 #define	atomic_readandclear_ptr		atomic_readandclear_long
 #else
 static __inline u_long
 atomic_readandclear_long(volatile u_long *addr)
 {
 
 	return ((u_long)atomic_readandclear_int((volatile u_int *)addr));
 }
 
 #define	atomic_readandclear_ptr		atomic_readandclear_int
 #endif
 
 /*
  * We assume that a = b will do atomic loads and stores.
  */
 #define	ATOMIC_STORE_LOAD(TYPE)					\
 static __inline u_##TYPE					\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
 {								\
 	u_##TYPE v;						\
 								\
 	v = *p;							\
 	powerpc_lwsync();					\
 	return (v);						\
 }								\
 								\
 static __inline void						\
 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
 {								\
 								\
 	powerpc_lwsync();					\
 	*p = v;							\
 }
 
 ATOMIC_STORE_LOAD(int)
 
 #define	atomic_load_acq_32	atomic_load_acq_int
 #define	atomic_store_rel_32	atomic_store_rel_int
 
 #ifdef __powerpc64__
 ATOMIC_STORE_LOAD(long)
 
 #define	atomic_load_acq_64	atomic_load_acq_long
 #define	atomic_store_rel_64	atomic_store_rel_long
 
 #define	atomic_load_acq_ptr	atomic_load_acq_long
 #define	atomic_store_rel_ptr	atomic_store_rel_long
 #else
 static __inline u_long
 atomic_load_acq_long(volatile u_long *addr)
 {
 
 	return ((u_long)atomic_load_acq_int((volatile u_int *)addr));
 }
 
 static __inline void
 atomic_store_rel_long(volatile u_long *addr, u_long val)
 {
 
 	atomic_store_rel_int((volatile u_int *)addr, (u_int)val);
 }
 
 #define	atomic_load_acq_ptr	atomic_load_acq_int
 #define	atomic_store_rel_ptr	atomic_store_rel_int
 #endif
 #undef ATOMIC_STORE_LOAD
 
 /*
  * Atomically compare the value stored at *p with cmpval and if the
  * two values are equal, update the value of *p with newval. Returns
  * zero if the compare failed, nonzero otherwise.
  */
 #ifdef ISA_206_ATOMICS
 static __inline int
 atomic_cmpset_char(volatile u_char *p, u_char cmpval, u_char newval)
 {
 	int	ret;
 
 	__asm __volatile (
 		"1:\tlbarx %0, 0, %2\n\t"	/* load old value */
 		"cmplw %3, %0\n\t"		/* compare */
 		"bne- 2f\n\t"			/* exit if not equal */
 		"stbcx. %4, 0, %2\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"stbcx. %0, 0, %2\n\t"       	/* clear reservation (74xx) */
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"3:\n\t"
 		: "=&r" (ret), "=m" (*p)
 		: "r" (p), "r" (cmpval), "r" (newval), "m" (*p)
 		: "cr0", "memory");
 
 	return (ret);
 }
 
 static __inline int
 atomic_cmpset_short(volatile u_short *p, u_short cmpval, u_short newval)
 {
 	int	ret;
 
 	__asm __volatile (
 		"1:\tlharx %0, 0, %2\n\t"	/* load old value */
 		"cmplw %3, %0\n\t"		/* compare */
 		"bne- 2f\n\t"			/* exit if not equal */
 		"sthcx. %4, 0, %2\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"sthcx. %0, 0, %2\n\t"       	/* clear reservation (74xx) */
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"3:\n\t"
 		: "=&r" (ret), "=m" (*p)
 		: "r" (p), "r" (cmpval), "r" (newval), "m" (*p)
 		: "cr0", "memory");
 
 	return (ret);
 }
 #else
 static __inline int
 atomic_cmpset_masked(uint32_t *p, uint32_t cmpval, uint32_t newval,
     uint32_t mask)
 {
 	int		ret;
 	uint32_t	tmp;
 
 	__asm __volatile (
 		"1:\tlwarx %2, 0, %3\n\t"	/* load old value */
 		"and %0, %2, %7\n\t"
 		"cmplw %4, %0\n\t"		/* compare */
 		"bne- 2f\n\t"			/* exit if not equal */
 		"andc %2, %2, %7\n\t"
 		"or %2, %2, %5\n\t"
 		"stwcx. %2, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"stwcx. %2, 0, %3\n\t"       	/* clear reservation (74xx) */
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"3:\n\t"
 		: "=&r" (ret), "=m" (*p), "+&r" (tmp)
 		: "r" (p), "r" (cmpval), "r" (newval), "m" (*p),
 		  "r" (mask)
 		: "cr0", "memory");
 
 	return (ret);
 }
 
 #define	_atomic_cmpset_masked_word(a,o,v,m) atomic_cmpset_masked(a, o, v, m)
 #endif
 
 static __inline int
 atomic_cmpset_int(volatile u_int* p, u_int cmpval, u_int newval)
 {
 	int	ret;
 
 	__asm __volatile (
 		"1:\tlwarx %0, 0, %2\n\t"	/* load old value */
 		"cmplw %3, %0\n\t"		/* compare */
 		"bne- 2f\n\t"			/* exit if not equal */
 		"stwcx. %4, 0, %2\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"stwcx. %0, 0, %2\n\t"       	/* clear reservation (74xx) */
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"3:\n\t"
 		: "=&r" (ret), "=m" (*p)
 		: "r" (p), "r" (cmpval), "r" (newval), "m" (*p)
 		: "cr0", "memory");
 
 	return (ret);
 }
 static __inline int
 atomic_cmpset_long(volatile u_long* p, u_long cmpval, u_long newval)
 {
 	int ret;
 
 	__asm __volatile (
 	    #ifdef __powerpc64__
 		"1:\tldarx %0, 0, %2\n\t"	/* load old value */
 		"cmpld %3, %0\n\t"		/* compare */
 		"bne- 2f\n\t"			/* exit if not equal */
 		"stdcx. %4, 0, %2\n\t"		/* attempt to store */
 	    #else
 		"1:\tlwarx %0, 0, %2\n\t"	/* load old value */
 		"cmplw %3, %0\n\t"		/* compare */
 		"bne- 2f\n\t"			/* exit if not equal */
 		"stwcx. %4, 0, %2\n\t"		/* attempt to store */
 	    #endif
 		"bne- 1b\n\t"			/* spin if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 	    #ifdef __powerpc64__
 		"stdcx. %0, 0, %2\n\t"		/* clear reservation (74xx) */
 	    #else
 		"stwcx. %0, 0, %2\n\t"		/* clear reservation (74xx) */
 	    #endif
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"3:\n\t"
 		: "=&r" (ret), "=m" (*p)
 		: "r" (p), "r" (cmpval), "r" (newval), "m" (*p)
 		: "cr0", "memory");
 
 	return (ret);
 }
 
 #define	ATOMIC_CMPSET_ACQ_REL(type) \
     static __inline int \
     atomic_cmpset_acq_##type(volatile u_##type *p, \
 	    u_##type cmpval, u_##type newval)\
     {\
 	u_##type retval; \
 	retval = atomic_cmpset_##type(p, cmpval, newval);\
 	__ATOMIC_ACQ();\
 	return (retval);\
     }\
     static __inline int \
     atomic_cmpset_rel_##type(volatile u_##type *p, \
 	    u_##type cmpval, u_##type newval)\
     {\
 	__ATOMIC_REL();\
 	return (atomic_cmpset_##type(p, cmpval, newval));\
     }\
     struct hack
 
 ATOMIC_CMPSET_ACQ_REL(int);
 ATOMIC_CMPSET_ACQ_REL(long);
 
 #ifdef ISA_206_ATOMICS
 #define	atomic_cmpset_8		atomic_cmpset_char
 #endif
 #define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
 #define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
 
 #ifdef ISA_206_ATOMICS
 #define	atomic_cmpset_16	atomic_cmpset_short
 #endif
 #define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
 #define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
 
 #define	atomic_cmpset_32	atomic_cmpset_int
 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
 
 #ifdef __powerpc64__
 #define	atomic_cmpset_64	atomic_cmpset_long
 #define	atomic_cmpset_acq_64	atomic_cmpset_acq_long
 #define	atomic_cmpset_rel_64	atomic_cmpset_rel_long
 
 #define	atomic_cmpset_ptr	atomic_cmpset_long
 #define	atomic_cmpset_acq_ptr	atomic_cmpset_acq_long
 #define	atomic_cmpset_rel_ptr	atomic_cmpset_rel_long
 #else
 #define	atomic_cmpset_ptr	atomic_cmpset_int
 #define	atomic_cmpset_acq_ptr	atomic_cmpset_acq_int
 #define	atomic_cmpset_rel_ptr	atomic_cmpset_rel_int
 #endif
 
 /*
  * Atomically compare the value stored at *p with *cmpval and if the
  * two values are equal, update the value of *p with newval. Returns
  * zero if the compare failed and sets *cmpval to the read value from *p,
  * nonzero otherwise.
  */
 #ifdef ISA_206_ATOMICS
 static __inline int
 atomic_fcmpset_char(volatile u_char *p, u_char *cmpval, u_char newval)
 {
 	int	ret;
 
 	__asm __volatile (
 		"lbarx %0, 0, %3\n\t"		/* load old value */
 		"cmplw %4, %0\n\t"		/* compare */
 		"bne- 1f\n\t"			/* exit if not equal */
 		"stbcx. %5, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1f\n\t"			/* exit if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 2f\n\t"			/* we've succeeded */
 		"1:\n\t"
 		"stbcx. %0, 0, %3\n\t"       	/* clear reservation (74xx) */
 		"stbx %0, 0, %7\n\t"
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"2:\n\t"
 		: "=&r" (ret), "=m" (*p), "=m" (*cmpval)
 		: "r" (p), "r" (*cmpval), "r" (newval), "m" (*p), "r"(cmpval)
 		: "cr0", "memory");
 
 	return (ret);
 }
 
 static __inline int
 atomic_fcmpset_short(volatile u_short *p, u_short *cmpval, u_short newval)
 {
 	int	ret;
 
 	__asm __volatile (
 		"lharx %0, 0, %3\n\t"		/* load old value */
 		"cmplw %4, %0\n\t"		/* compare */
 		"bne- 1f\n\t"			/* exit if not equal */
 		"sthcx. %5, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1f\n\t"			/* exit if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 2f\n\t"			/* we've succeeded */
 		"1:\n\t"
 		"sthcx. %0, 0, %3\n\t"       	/* clear reservation (74xx) */
 		"sthx %0, 0, %7\n\t"
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"2:\n\t"
 		: "=&r" (ret), "=m" (*p), "=m" (*cmpval)
 		: "r" (p), "r" (*cmpval), "r" (newval), "m" (*p), "r"(cmpval)
 		: "cr0", "memory");
 
 	return (ret);
 }
 #endif	/* ISA_206_ATOMICS */
 
 static __inline int
 atomic_fcmpset_int(volatile u_int *p, u_int *cmpval, u_int newval)
 {
 	int	ret;
 
 	__asm __volatile (
 		"lwarx %0, 0, %3\n\t"		/* load old value */
 		"cmplw %4, %0\n\t"		/* compare */
 		"bne- 1f\n\t"			/* exit if not equal */
 		"stwcx. %5, 0, %3\n\t"      	/* attempt to store */
 		"bne- 1f\n\t"			/* exit if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 2f\n\t"			/* we've succeeded */
 		"1:\n\t"
 		"stwcx. %0, 0, %3\n\t"       	/* clear reservation (74xx) */
 		"stwx %0, 0, %7\n\t"
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"2:\n\t"
 		: "=&r" (ret), "=m" (*p), "=m" (*cmpval)
 		: "r" (p), "r" (*cmpval), "r" (newval), "m" (*p), "r"(cmpval)
 		: "cr0", "memory");
 
 	return (ret);
 }
 static __inline int
 atomic_fcmpset_long(volatile u_long *p, u_long *cmpval, u_long newval)
 {
 	int ret;
 
 	__asm __volatile (
 	    #ifdef __powerpc64__
 		"ldarx %0, 0, %3\n\t"		/* load old value */
 		"cmpld %4, %0\n\t"		/* compare */
 		"bne- 1f\n\t"			/* exit if not equal */
 		"stdcx. %5, 0, %3\n\t"		/* attempt to store */
 	    #else
 		"lwarx %0, 0, %3\n\t"		/* load old value */
 		"cmplw %4, %0\n\t"		/* compare */
 		"bne- 1f\n\t"			/* exit if not equal */
 		"stwcx. %5, 0, %3\n\t"		/* attempt to store */
 	    #endif
 		"bne- 1f\n\t"			/* exit if failed */
 		"li %0, 1\n\t"			/* success - retval = 1 */
 		"b 2f\n\t"			/* we've succeeded */
 		"1:\n\t"
 	    #ifdef __powerpc64__
 		"stdcx. %0, 0, %3\n\t"		/* clear reservation (74xx) */
 		"stdx %0, 0, %7\n\t"
 	    #else
 		"stwcx. %0, 0, %3\n\t"		/* clear reservation (74xx) */
 		"stwx %0, 0, %7\n\t"
 	    #endif
 		"li %0, 0\n\t"			/* failure - retval = 0 */
 		"2:\n\t"
 		: "=&r" (ret), "=m" (*p), "=m" (*cmpval)
 		: "r" (p), "r" (*cmpval), "r" (newval), "m" (*p), "r"(cmpval)
 		: "cr0", "memory");
 
 	return (ret);
 }
 
 #define	ATOMIC_FCMPSET_ACQ_REL(type) \
     static __inline int \
     atomic_fcmpset_acq_##type(volatile u_##type *p, \
 	    u_##type *cmpval, u_##type newval)\
     {\
 	u_##type retval; \
 	retval = atomic_fcmpset_##type(p, cmpval, newval);\
 	__ATOMIC_ACQ();\
 	return (retval);\
     }\
     static __inline int \
     atomic_fcmpset_rel_##type(volatile u_##type *p, \
 	    u_##type *cmpval, u_##type newval)\
     {\
 	__ATOMIC_REL();\
 	return (atomic_fcmpset_##type(p, cmpval, newval));\
     }\
     struct hack
 
 ATOMIC_FCMPSET_ACQ_REL(int);
 ATOMIC_FCMPSET_ACQ_REL(long);
 
 #ifdef ISA_206_ATOMICS
 #define	atomic_fcmpset_8	atomic_fcmpset_char
 #endif
 #define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
 #define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
 
 #ifdef ISA_206_ATOMICS
 #define	atomic_fcmpset_16	atomic_fcmpset_short
 #endif
 #define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
 #define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
 
 #define	atomic_fcmpset_32	atomic_fcmpset_int
 #define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
 #define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
 
 #ifdef __powerpc64__
 #define	atomic_fcmpset_64	atomic_fcmpset_long
 #define	atomic_fcmpset_acq_64	atomic_fcmpset_acq_long
 #define	atomic_fcmpset_rel_64	atomic_fcmpset_rel_long
 
 #define	atomic_fcmpset_ptr	atomic_fcmpset_long
 #define	atomic_fcmpset_acq_ptr	atomic_fcmpset_acq_long
 #define	atomic_fcmpset_rel_ptr	atomic_fcmpset_rel_long
 #else
 #define	atomic_fcmpset_ptr	atomic_fcmpset_int
 #define	atomic_fcmpset_acq_ptr	atomic_fcmpset_acq_int
 #define	atomic_fcmpset_rel_ptr	atomic_fcmpset_rel_int
 #endif
 
 static __inline u_int
 atomic_fetchadd_int(volatile u_int *p, u_int v)
 {
 	u_int value;
 
 	do {
 		value = *p;
 	} while (!atomic_cmpset_int(p, value, value + v));
 	return (value);
 }
 
 static __inline u_long
 atomic_fetchadd_long(volatile u_long *p, u_long v)
 {
 	u_long value;
 
 	do {
 		value = *p;
 	} while (!atomic_cmpset_long(p, value, value + v));
 	return (value);
 }
 
 static __inline u_int
 atomic_swap_32(volatile u_int *p, u_int v)
 {
 	u_int prev;
 
 	__asm __volatile(
 	"1:	lwarx	%0,0,%2\n"
 	"	stwcx.	%3,0,%2\n"
 	"	bne-	1b\n"
 	: "=&r" (prev), "+m" (*(volatile u_int *)p)
 	: "r" (p), "r" (v)
 	: "cr0", "memory");
 
 	return (prev);
 }
 
 #ifdef __powerpc64__
 static __inline u_long
 atomic_swap_64(volatile u_long *p, u_long v)
 {
 	u_long prev;
 
 	__asm __volatile(
 	"1:	ldarx	%0,0,%2\n"
 	"	stdcx.	%3,0,%2\n"
 	"	bne-	1b\n"
 	: "=&r" (prev), "+m" (*(volatile u_long *)p)
 	: "r" (p), "r" (v)
 	: "cr0", "memory");
 
 	return (prev);
 }
 #endif
 
 #define	atomic_fetchadd_32	atomic_fetchadd_int
 #define	atomic_swap_int		atomic_swap_32
 
 #ifdef __powerpc64__
 #define	atomic_fetchadd_64	atomic_fetchadd_long
 #define	atomic_swap_long	atomic_swap_64
 #define	atomic_swap_ptr		atomic_swap_64
 #else
 #define	atomic_swap_long(p,v)	atomic_swap_32((volatile u_int *)(p), v)
 #define	atomic_swap_ptr(p,v)	atomic_swap_32((volatile u_int *)(p), v)
 #endif
 
 static __inline int
 atomic_testandset_int(volatile u_int *p, u_int v)
 {
 	u_int m = (1u << (v & 0x1f));
 	u_int res;
 	u_int tmp;
 
 	__asm __volatile(
 	"1:	lwarx	%0,0,%3\n"
 	"	and	%1,%0,%4\n"
 	"	or	%0,%0,%4\n"
 	"	stwcx.	%0,0,%3\n"
 	"	bne-	1b\n"
 	: "=&r"(tmp), "=&r"(res), "+m"(*p)
 	: "r"(p), "r"(m)
 	: "cr0", "memory");
 
 	return (res != 0);
 }
 
 static __inline int
 atomic_testandclear_int(volatile u_int *p, u_int v)
 {
 	u_int m = (1u << (v & 0x1f));
 	u_int res;
 	u_int tmp;
 
 	__asm __volatile(
 	"1:	lwarx	%0,0,%3\n"
 	"	and	%1,%0,%4\n"
 	"	andc	%0,%0,%4\n"
 	"	stwcx.	%0,0,%3\n"
 	"	bne-	1b\n"
 	: "=&r"(tmp), "=&r"(res), "+m"(*p)
 	: "r"(p), "r"(m)
 	: "cr0", "memory");
 
 	return (res != 0);
 }
 
 #ifdef __powerpc64__
 static __inline int
 atomic_testandset_long(volatile u_long *p, u_int v)
 {
 	u_long m = (1ul << (v & 0x3f));
 	u_long res;
 	u_long tmp;
 
 	__asm __volatile(
 	"1:	ldarx	%0,0,%3\n"
 	"	and	%1,%0,%4\n"
 	"	or	%0,%0,%4\n"
 	"	stdcx.	%0,0,%3\n"
 	"	bne-	1b\n"
 	: "=&r"(tmp), "=&r"(res), "+m"(*(volatile u_long *)p)
 	: "r"(p), "r"(m)
 	: "cr0", "memory");
 
 	return (res != 0);
 }
 
 static __inline int
 atomic_testandclear_long(volatile u_long *p, u_int v)
 {
 	u_long m = (1ul << (v & 0x3f));
 	u_long res;
 	u_long tmp;
 
 	__asm __volatile(
 	"1:	ldarx	%0,0,%3\n"
 	"	and	%1,%0,%4\n"
 	"	andc	%0,%0,%4\n"
 	"	stdcx.	%0,0,%3\n"
 	"	bne-	1b\n"
 	: "=&r"(tmp), "=&r"(res), "+m"(*p)
 	: "r"(p), "r"(m)
 	: "cr0", "memory");
 
 	return (res != 0);
 }
 #else
 static __inline int
 atomic_testandset_long(volatile u_long *p, u_int v)
 {
 	return (atomic_testandset_int((volatile u_int *)p, v));
 }
 
 static __inline int
 atomic_testandclear_long(volatile u_long *p, u_int v)
 {
 	return (atomic_testandclear_int((volatile u_int *)p, v));
 }
 #endif
 
 #define	atomic_testandclear_32	atomic_testandclear_int
 #define	atomic_testandset_32	atomic_testandset_int
 
 static __inline int
 atomic_testandset_acq_long(volatile u_long *p, u_int v)
 {
 	u_int a = atomic_testandset_long(p, v);
 	__ATOMIC_ACQ();
 	return (a);
 }
 
-#define	atomic_testandclear_int		atomic_testandclear_int
-#define	atomic_testandset_int		atomic_testandset_int
-#define	atomic_testandclear_long	atomic_testandclear_long
-#define	atomic_testandset_long		atomic_testandset_long
-#define	atomic_testandset_acq_long	atomic_testandset_acq_long
-
 static __inline void
 atomic_thread_fence_acq(void)
 {
 
 	powerpc_lwsync();
 }
 
 static __inline void
 atomic_thread_fence_rel(void)
 {
 
 	powerpc_lwsync();
 }
 
 static __inline void
 atomic_thread_fence_acq_rel(void)
 {
 
 	powerpc_lwsync();
 }
 
 static __inline void
 atomic_thread_fence_seq_cst(void)
 {
 
 	__asm __volatile("sync" : : : "memory");
 }
 
 #ifndef ISA_206_ATOMICS
 #include <sys/_atomic_subword.h>
 #define	atomic_cmpset_char	atomic_cmpset_8
 #define	atomic_cmpset_short	atomic_cmpset_16
 #define	atomic_fcmpset_char	atomic_fcmpset_8
 #define	atomic_fcmpset_short	atomic_fcmpset_16
 #endif
 
 /* These need sys/_atomic_subword.h on non-ISA-2.06-atomic platforms. */
 ATOMIC_CMPSET_ACQ_REL(char);
 ATOMIC_CMPSET_ACQ_REL(short);
 
 ATOMIC_FCMPSET_ACQ_REL(char);
 ATOMIC_FCMPSET_ACQ_REL(short);
 
 #undef __ATOMIC_REL
 #undef __ATOMIC_ACQ
 
 #endif /* ! _MACHINE_ATOMIC_H_ */
diff --git a/sys/sys/_atomic_subword.h b/sys/sys/_atomic_subword.h
index dc1b2f23e731..dad23383f642 100644
--- a/sys/sys/_atomic_subword.h
+++ b/sys/sys/_atomic_subword.h
@@ -1,272 +1,208 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #ifndef _SYS__ATOMIC_SUBWORD_H_
 #define	_SYS__ATOMIC_SUBWORD_H_
 
 /*
  * This header is specifically for platforms that either do not have ways to or
  * simply do not do sub-word atomic operations.  These are not ideal as they
  * require a little more effort to make sure our atomic operations are failing
  * because of the bits of the word we're trying to write rather than the rest
  * of the word.
  */
 #ifndef _MACHINE_ATOMIC_H_
 #error do not include this header, use machine/atomic.h
 #endif
 
 #include <machine/endian.h>
 #ifndef _KERNEL
 #include <stdbool.h>
 #endif
 
 #ifndef NBBY
 #define	NBBY	8
 #endif
 
 #define	_ATOMIC_WORD_ALIGNED(p)		\
     (uint32_t *)((__uintptr_t)(p) - ((__uintptr_t)(p) % 4))
 
 #if _BYTE_ORDER == _BIG_ENDIAN
 #define	_ATOMIC_BYTE_SHIFT(p)		\
     ((3 - ((__uintptr_t)(p) % 4)) * NBBY)
 
 #define	_ATOMIC_HWORD_SHIFT(p)		\
     ((2 - ((__uintptr_t)(p) % 4)) * NBBY)
 #else
 #define	_ATOMIC_BYTE_SHIFT(p)		\
     ((((__uintptr_t)(p) % 4)) * NBBY)
 
 #define	_ATOMIC_HWORD_SHIFT(p)		\
     ((((__uintptr_t)(p) % 4)) * NBBY)
 #endif
 
 #ifndef	_atomic_cmpset_masked_word
 /*
  * Pass these bad boys a couple words and a mask of the bits you care about,
  * they'll loop until we either succeed or fail because of those bits rather
  * than the ones we're not masking.  old and val should already be preshifted to
  * the proper position.
  */
 static __inline int
 _atomic_cmpset_masked_word(uint32_t *addr, uint32_t old, uint32_t val,
     uint32_t mask)
 {
 	int ret;
 	uint32_t wcomp;
 
 	wcomp = old;
 
 	/*
 	 * We'll attempt the cmpset on the entire word.  Loop here in case the
 	 * operation fails due to the other half-word resident in that word,
 	 * rather than the half-word we're trying to operate on.  Ideally we
 	 * only take one trip through here.  We'll have to recalculate the old
 	 * value since it's the other part of the word changing.
 	 */
 	do {
 		old = (*addr & ~mask) | wcomp;
 		ret = atomic_fcmpset_32(addr, &old, (old & ~mask) | val);
 	} while (ret == 0 && (old & mask) == wcomp);
 
 	return (ret);
 }
 #endif
 
 #ifndef	_atomic_fcmpset_masked_word
 static __inline int
 _atomic_fcmpset_masked_word(uint32_t *addr, uint32_t *old, uint32_t val,
     uint32_t mask)
 {
 
 	/*
 	 * fcmpset_* is documented in atomic(9) to allow spurious failures where
 	 * *old == val on ll/sc architectures because the sc may fail due to
 	 * parallel writes or other reasons.  We take advantage of that here
 	 * and only attempt once, because the caller should be compensating for
 	 * that possibility.
 	 */
 	*old = (*addr & ~mask) | *old;
 	return (atomic_fcmpset_32(addr, old, (*old & ~mask) | val));
 }
 #endif
 
 #ifndef atomic_cmpset_8
 static __inline int
 atomic_cmpset_8(__volatile uint8_t *addr, uint8_t old, uint8_t val)
 {
 	int shift;
 
 	shift = _ATOMIC_BYTE_SHIFT(addr);
 
 	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
 	    old << shift, val << shift, 0xff << shift));
 }
 #endif
 
 #ifndef atomic_fcmpset_8
 static __inline int
 atomic_fcmpset_8(__volatile uint8_t *addr, uint8_t *old, uint8_t val)
 {
 	int ret, shift;
 	uint32_t wold;
 
 	shift = _ATOMIC_BYTE_SHIFT(addr);
 	wold = *old << shift;
 	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
 	    &wold, val << shift, 0xff << shift);
 	if (ret == 0)
 		*old = (wold >> shift) & 0xff;
 	return (ret);
 }
 #endif
 
 #ifndef atomic_cmpset_16
 static __inline int
 atomic_cmpset_16(__volatile uint16_t *addr, uint16_t old, uint16_t val)
 {
 	int shift;
 
 	shift = _ATOMIC_HWORD_SHIFT(addr);
 
 	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
 	    old << shift, val << shift, 0xffff << shift));
 }
 #endif
 
 #ifndef atomic_fcmpset_16
 static __inline int
 atomic_fcmpset_16(__volatile uint16_t *addr, uint16_t *old, uint16_t val)
 {
 	int ret, shift;
 	uint32_t wold;
 
 	shift = _ATOMIC_HWORD_SHIFT(addr);
 	wold = *old << shift;
 	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
 	    &wold, val << shift, 0xffff << shift);
 	if (ret == 0)
 		*old = (wold >> shift) & 0xffff;
 	return (ret);
 }
 #endif
 
 #ifndef atomic_load_acq_8
 static __inline uint8_t
 atomic_load_acq_8(volatile uint8_t *p)
 {
 	int shift;
 	uint8_t ret;
 
 	shift = _ATOMIC_BYTE_SHIFT(p);
 	ret = (atomic_load_acq_32(_ATOMIC_WORD_ALIGNED(p)) >> shift) & 0xff;
 	return (ret);
 }
 #endif
 
 #ifndef atomic_load_acq_16
 static __inline uint16_t
 atomic_load_acq_16(volatile uint16_t *p)
 {
 	int shift;
 	uint16_t ret;
 
 	shift = _ATOMIC_HWORD_SHIFT(p);
 	ret = (atomic_load_acq_32(_ATOMIC_WORD_ALIGNED(p)) >> shift) &
 	    0xffff;
 	return (ret);
 }
 #endif
 
 #undef _ATOMIC_WORD_ALIGNED
 #undef _ATOMIC_BYTE_SHIFT
 #undef _ATOMIC_HWORD_SHIFT
 
-/*
- * Provide generic testandset_long implementation based on fcmpset long
- * primitive.  It may not be ideal for any given arch, so machine/atomic.h
- * should define the macro atomic_testandset_long to override with an
- * MD-specific version.
- *
- * (Organizationally, this isn't really subword atomics.  But atomic_common is
- * included too early in machine/atomic.h, so it isn't a good place for derived
- * primitives like this.)
- */
-#ifndef atomic_testandset_acq_long
-static __inline int
-atomic_testandset_acq_long(volatile u_long *p, u_int v)
-{
-	u_long bit, old;
-	bool ret;
-
-	bit = (1ul << (v % (sizeof(*p) * NBBY)));
-
-	old = atomic_load_acq_long(p);
-	ret = false;
-	while (!ret && (old & bit) == 0)
-		ret = atomic_fcmpset_acq_long(p, &old, old | bit);
-
-	return (!ret);
-}
-#endif
-
-#ifndef atomic_testandset_long
-static __inline int
-atomic_testandset_long(volatile u_long *p, u_int v)
-{
-	u_long bit, old;
-	bool ret;
-
-	bit = (1ul << (v % (sizeof(*p) * NBBY)));
-
-	old = atomic_load_long(p);
-	ret = false;
-	while (!ret && (old & bit) == 0)
-		ret = atomic_fcmpset_long(p, &old, old | bit);
-
-	return (!ret);
-}
-#endif
-
-#ifndef atomic_testandclear_long
-static __inline int
-atomic_testandclear_long(volatile u_long *p, u_int v)
-{
-	u_long bit, old;
-	bool ret;
-
-	bit = (1ul << (v % (sizeof(*p) * NBBY)));
-
-	old = atomic_load_long(p);
-	ret = false;
-	while (!ret && (old & bit) != 0)
-		ret = atomic_fcmpset_long(p, &old, old & ~bit);
-
-	return (ret);
-}
-#endif
-
 #endif	/* _SYS__ATOMIC_SUBWORD_H_ */