Index: head/sys/mips/include/atomic.h
===================================================================
--- head/sys/mips/include/atomic.h	(revision 327096)
+++ head/sys/mips/include/atomic.h	(revision 327097)
@@ -1,776 +1,758 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1998 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: src/sys/alpha/include/atomic.h,v 1.21.2.3 2005/10/06 18:12:05 jhb
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_ATOMIC_H_
 #define	_MACHINE_ATOMIC_H_
 
 #ifndef _SYS_CDEFS_H_
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
 #include <sys/atomic_common.h>
 
 /*
  * Note: All the 64-bit atomic operations are only atomic when running
  * in 64-bit mode.  It is assumed that code compiled for n32 and n64
  * fits into this definition and no further safeties are needed.
  *
  * It is also assumed that the add, subtract and other arithmetic is
  * done on numbers not pointers.  The special rules for n32 pointers
  * do not have atomic operations defined for them, but generally shouldn't
  * need atomic operations.
  */
 #ifndef __MIPS_PLATFORM_SYNC_NOPS
 #define __MIPS_PLATFORM_SYNC_NOPS ""
 #endif
 
 static __inline  void
 mips_sync(void)
 {
 	__asm __volatile (".set noreorder\n"
 			"\tsync\n"
 			__MIPS_PLATFORM_SYNC_NOPS
 			".set reorder\n"
 			: : : "memory");
 }
 
 #define mb()	mips_sync()
 #define wmb()	mips_sync()
 #define rmb()	mips_sync()
 
 /*
  * Various simple arithmetic on memory which is atomic in the presence
  * of interrupts and SMP safe.
  */
 
 void atomic_set_8(__volatile uint8_t *, uint8_t);
 void atomic_clear_8(__volatile uint8_t *, uint8_t);
 void atomic_add_8(__volatile uint8_t *, uint8_t);
 void atomic_subtract_8(__volatile uint8_t *, uint8_t);
 
 void atomic_set_16(__volatile uint16_t *, uint16_t);
 void atomic_clear_16(__volatile uint16_t *, uint16_t);
 void atomic_add_16(__volatile uint16_t *, uint16_t);
 void atomic_subtract_16(__volatile uint16_t *, uint16_t);
 
 static __inline void
 atomic_set_32(__volatile uint32_t *p, uint32_t v)
 {
 	uint32_t temp;
 
 	__asm __volatile (
 		"1:\tll	%0, %3\n\t"		/* load old value */
 		"or	%0, %2, %0\n\t"		/* calculate new value */
 		"sc	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 
 }
 
 static __inline void
 atomic_clear_32(__volatile uint32_t *p, uint32_t v)
 {
 	uint32_t temp;
 	v = ~v;
 
 	__asm __volatile (
 		"1:\tll	%0, %3\n\t"		/* load old value */
 		"and	%0, %2, %0\n\t"		/* calculate new value */
 		"sc	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 }
 
 static __inline void
 atomic_add_32(__volatile uint32_t *p, uint32_t v)
 {
 	uint32_t temp;
 
 	__asm __volatile (
 		"1:\tll	%0, %3\n\t"		/* load old value */
 		"addu	%0, %2, %0\n\t"		/* calculate new value */
 		"sc	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 }
 
 static __inline void
 atomic_subtract_32(__volatile uint32_t *p, uint32_t v)
 {
 	uint32_t temp;
 
 	__asm __volatile (
 		"1:\tll	%0, %3\n\t"		/* load old value */
 		"subu	%0, %2\n\t"		/* calculate new value */
 		"sc	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 }
 
 static __inline uint32_t
 atomic_readandclear_32(__volatile uint32_t *addr)
 {
 	uint32_t result,temp;
 
 	__asm __volatile (
 		"1:\tll	 %0,%3\n\t"	/* load current value, asserting lock */
 		"li	 %1,0\n\t"		/* value to store */
 		"sc	 %1,%2\n\t"	/* attempt to store */
 		"beqz	 %1, 1b\n\t"		/* if the store failed, spin */
 		: "=&r"(result), "=&r"(temp), "=m" (*addr)
 		: "m" (*addr)
 		: "memory");
 
 	return result;
 }
 
 static __inline uint32_t
 atomic_readandset_32(__volatile uint32_t *addr, uint32_t value)
 {
 	uint32_t result,temp;
 
 	__asm __volatile (
 		"1:\tll	 %0,%3\n\t"	/* load current value, asserting lock */
 		"or      %1,$0,%4\n\t"
 		"sc	 %1,%2\n\t"	/* attempt to store */
 		"beqz	 %1, 1b\n\t"		/* if the store failed, spin */
 		: "=&r"(result), "=&r"(temp), "=m" (*addr)
 		: "m" (*addr), "r" (value)
 		: "memory");
 
 	return result;
 }
 
 #if defined(__mips_n64) || defined(__mips_n32)
 static __inline void
 atomic_set_64(__volatile uint64_t *p, uint64_t v)
 {
 	uint64_t temp;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	%0, %3\n\t"		/* load old value */
 		"or	%0, %2, %0\n\t"		/* calculate new value */
 		"scd	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 
 }
 
 static __inline void
 atomic_clear_64(__volatile uint64_t *p, uint64_t v)
 {
 	uint64_t temp;
 	v = ~v;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	%0, %3\n\t"		/* load old value */
 		"and	%0, %2, %0\n\t"		/* calculate new value */
 		"scd	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 }
 
 static __inline void
 atomic_add_64(__volatile uint64_t *p, uint64_t v)
 {
 	uint64_t temp;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	%0, %3\n\t"		/* load old value */
 		"daddu	%0, %2, %0\n\t"		/* calculate new value */
 		"scd	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 }
 
 static __inline void
 atomic_subtract_64(__volatile uint64_t *p, uint64_t v)
 {
 	uint64_t temp;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	%0, %3\n\t"		/* load old value */
 		"dsubu	%0, %2\n\t"		/* calculate new value */
 		"scd	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* spin if failed */
 		: "=&r" (temp), "=m" (*p)
 		: "r" (v), "m" (*p)
 		: "memory");
 }
 
 static __inline uint64_t
 atomic_readandclear_64(__volatile uint64_t *addr)
 {
 	uint64_t result,temp;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	 %0, %3\n\t"		/* load old value */
 		"li	 %1, 0\n\t"		/* value to store */
 		"scd	 %1, %2\n\t"		/* attempt to store */
 		"beqz	 %1, 1b\n\t"		/* if the store failed, spin */
 		: "=&r"(result), "=&r"(temp), "=m" (*addr)
 		: "m" (*addr)
 		: "memory");
 
 	return result;
 }
 
 static __inline uint64_t
 atomic_readandset_64(__volatile uint64_t *addr, uint64_t value)
 {
 	uint64_t result,temp;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	 %0,%3\n\t"		/* Load old value*/
 		"or      %1,$0,%4\n\t"
 		"scd	 %1,%2\n\t"		/* attempt to store */
 		"beqz	 %1, 1b\n\t"		/* if the store failed, spin */
 		: "=&r"(result), "=&r"(temp), "=m" (*addr)
 		: "m" (*addr), "r" (value)
 		: "memory");
 
 	return result;
 }
 #endif
 
 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
 static __inline  void							\
 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {									\
 	atomic_##NAME##_##WIDTH(p, v);					\
 	mips_sync(); 							\
 }									\
 									\
 static __inline  void							\
 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {									\
 	mips_sync();							\
 	atomic_##NAME##_##WIDTH(p, v);					\
 }
 
 /* Variants of simple arithmetic with memory barriers. */
 ATOMIC_ACQ_REL(set, 8)
 ATOMIC_ACQ_REL(clear, 8)
 ATOMIC_ACQ_REL(add, 8)
 ATOMIC_ACQ_REL(subtract, 8)
 ATOMIC_ACQ_REL(set, 16)
 ATOMIC_ACQ_REL(clear, 16)
 ATOMIC_ACQ_REL(add, 16)
 ATOMIC_ACQ_REL(subtract, 16)
 ATOMIC_ACQ_REL(set, 32)
 ATOMIC_ACQ_REL(clear, 32)
 ATOMIC_ACQ_REL(add, 32)
 ATOMIC_ACQ_REL(subtract, 32)
 #if defined(__mips_n64) || defined(__mips_n32)
 ATOMIC_ACQ_REL(set, 64)
 ATOMIC_ACQ_REL(clear, 64)
 ATOMIC_ACQ_REL(add, 64)
 ATOMIC_ACQ_REL(subtract, 64)
 #endif
 
 #undef ATOMIC_ACQ_REL
 
 /*
  * We assume that a = b will do atomic loads and stores.
  */
 #define	ATOMIC_STORE_LOAD(WIDTH)			\
 static __inline  uint##WIDTH##_t			\
 atomic_load_acq_##WIDTH(__volatile uint##WIDTH##_t *p)	\
 {							\
 	uint##WIDTH##_t v;				\
 							\
 	v = *p;						\
 	mips_sync();					\
 	return (v);					\
 }							\
 							\
 static __inline  void					\
 atomic_store_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {							\
 	mips_sync();					\
 	*p = v;						\
 }
 
 ATOMIC_STORE_LOAD(32)
 ATOMIC_STORE_LOAD(64)
-#if !defined(__mips_n64) && !defined(__mips_n32)
-void atomic_store_64(__volatile uint64_t *, uint64_t);
-uint64_t atomic_load_64(__volatile uint64_t *);
-#elif defined (__mips_n32)
-static __inline void
-atomic_store_64(__volatile uint64_t *p, uint64_t v)
-{
-	*p = v;
-}
-
-static __inline uint64_t
-atomic_load_64(__volatile uint64_t *p)
-{
-	return (*p);
-}
-/* #else atomic_common.h definitions of atomic_load/store_64 are used */
-#endif
-
 #undef ATOMIC_STORE_LOAD
 
 /*
  * Atomically compare the value stored at *p with cmpval and if the
  * two values are equal, update the value of *p with newval. Returns
  * zero if the compare failed, nonzero otherwise.
  */
 static __inline uint32_t
 atomic_cmpset_32(__volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	uint32_t ret;
 
 	__asm __volatile (
 		"1:\tll	%0, %4\n\t"		/* load old value */
 		"bne %0, %2, 2f\n\t"		/* compare */
 		"move %0, %3\n\t"		/* value to store */
 		"sc %0, %1\n\t"			/* attempt to store */
 		"beqz %0, 1b\n\t"		/* if it failed, spin */
 		"j 3f\n\t"
 		"2:\n\t"
 		"li	%0, 0\n\t"
 		"3:\n"
 		: "=&r" (ret), "=m" (*p)
 		: "r" (cmpval), "r" (newval), "m" (*p)
 		: "memory");
 
 	return ret;
 }
 
 /*
  * Atomically compare the value stored at *p with cmpval and if the
  * two values are equal, update the value of *p with newval. Returns
  * zero if the compare failed, nonzero otherwise.
  */
 static __inline uint32_t
 atomic_cmpset_acq_32(__volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	int retval;
 
 	retval = atomic_cmpset_32(p, cmpval, newval);
 	mips_sync();
 	return (retval);
 }
 
 static __inline uint32_t
 atomic_cmpset_rel_32(__volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	mips_sync();
 	return (atomic_cmpset_32(p, cmpval, newval));
 }
 
 static __inline uint32_t
 atomic_fcmpset_32(__volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
 {
 	uint32_t ret;
 
 	__asm __volatile (
 		"1:\n\t"
 		"ll	%0, %1\n\t"		/* load old value */
 		"bne	%0, %4, 2f\n\t"		/* compare */
 		"move	%0, %3\n\t"		/* value to store */
 		"sc	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* if it failed, spin */
 		"j	3f\n\t"
 		"2:\n\t"
 		"sw	%0, %2\n\t"		/* save old value */
 		"li	%0, 0\n\t"
 		"3:\n"
 		: "=&r" (ret), "+m" (*p), "=m" (*cmpval)
 		: "r" (newval), "r" (*cmpval)
 		: "memory");
 	return ret;
 }
 
 static __inline uint32_t
 atomic_fcmpset_acq_32(__volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
 {
 	int retval;
 
 	retval = atomic_fcmpset_32(p, cmpval, newval);
 	mips_sync();
 	return (retval);
 }
 
 static __inline uint32_t
 atomic_fcmpset_rel_32(__volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
 {
 	mips_sync();
 	return (atomic_fcmpset_32(p, cmpval, newval));
 }
 
 /*
  * Atomically add the value of v to the integer pointed to by p and return
  * the previous value of *p.
  */
 static __inline uint32_t
 atomic_fetchadd_32(__volatile uint32_t *p, uint32_t v)
 {
 	uint32_t value, temp;
 
 	__asm __volatile (
 		"1:\tll %0, %1\n\t"		/* load old value */
 		"addu %2, %3, %0\n\t"		/* calculate new value */
 		"sc %2, %1\n\t"			/* attempt to store */
 		"beqz %2, 1b\n\t"		/* spin if failed */
 		: "=&r" (value), "=m" (*p), "=&r" (temp)
 		: "r" (v), "m" (*p));
 	return (value);
 }
 
 #if defined(__mips_n64) || defined(__mips_n32)
 /*
  * Atomically compare the value stored at *p with cmpval and if the
  * two values are equal, update the value of *p with newval. Returns
  * zero if the compare failed, nonzero otherwise.
  */
 static __inline uint64_t
 atomic_cmpset_64(__volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	uint64_t ret;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	%0, %4\n\t"		/* load old value */
 		"bne	%0, %2, 2f\n\t"		/* compare */
 		"move	%0, %3\n\t"		/* value to store */
 		"scd	%0, %1\n\t"		/* attempt to store */
 		"beqz	%0, 1b\n\t"		/* if it failed, spin */
 		"j	3f\n\t"
 		"2:\n\t"
 		"li	%0, 0\n\t"
 		"3:\n"
 		: "=&r" (ret), "=m" (*p)
 		: "r" (cmpval), "r" (newval), "m" (*p)
 		: "memory");
 
 	return ret;
 }
 
 /*
  * Atomically compare the value stored at *p with cmpval and if the
  * two values are equal, update the value of *p with newval. Returns
  * zero if the compare failed, nonzero otherwise.
  */
 static __inline uint64_t
 atomic_cmpset_acq_64(__volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	int retval;
 
 	retval = atomic_cmpset_64(p, cmpval, newval);
 	mips_sync();
 	return (retval);
 }
 
 static __inline uint64_t
 atomic_cmpset_rel_64(__volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	mips_sync();
 	return (atomic_cmpset_64(p, cmpval, newval));
 }
 
 static __inline uint32_t
 atomic_fcmpset_64(__volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
 {
         uint32_t ret;
 
         __asm __volatile (
                 "1:\n\t"
 		"lld	%0, %1\n\t"		/* load old value */
                 "bne	%0, %4, 2f\n\t"		/* compare */
                 "move	%0, %3\n\t"		/* value to store */
                 "scd	%0, %1\n\t"		/* attempt to store */
                 "beqz	%0, 1b\n\t"		/* if it failed, spin */
                 "j	3f\n\t"
                 "2:\n\t"
                 "sd	%0, %2\n\t"		/* save old value */
                 "li	%0, 0\n\t"
                 "3:\n"
                 : "=&r" (ret), "+m" (*p), "=m" (*cmpval)
                 : "r" (newval), "r" (*cmpval)
                 : "memory");
 
 	return ret;
 }
 
 static __inline uint64_t
 atomic_fcmpset_acq_64(__volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
 {
 	int retval;
 
 	retval = atomic_fcmpset_64(p, cmpval, newval);
 	mips_sync();
 	return (retval);
 }
 
 static __inline uint64_t
 atomic_fcmpset_rel_64(__volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
 {
 	mips_sync();
 	return (atomic_fcmpset_64(p, cmpval, newval));
 }
 
 /*
  * Atomically add the value of v to the integer pointed to by p and return
  * the previous value of *p.
  */
 static __inline uint64_t
 atomic_fetchadd_64(__volatile uint64_t *p, uint64_t v)
 {
 	uint64_t value, temp;
 
 	__asm __volatile (
 		"1:\n\t"
 		"lld	%0, %1\n\t"		/* load old value */
 		"daddu	%2, %3, %0\n\t"		/* calculate new value */
 		"scd	%2, %1\n\t"		/* attempt to store */
 		"beqz	%2, 1b\n\t"		/* spin if failed */
 		: "=&r" (value), "=m" (*p), "=&r" (temp)
 		: "r" (v), "m" (*p));
 	return (value);
 }
 #endif
 
 static __inline void
 atomic_thread_fence_acq(void)
 {
 
 	mips_sync();
 }
 
 static __inline void
 atomic_thread_fence_rel(void)
 {
 
 	mips_sync();
 }
 
 static __inline void
 atomic_thread_fence_acq_rel(void)
 {
 
 	mips_sync();
 }
 
 static __inline void
 atomic_thread_fence_seq_cst(void)
 {
 
 	mips_sync();
 }
 
 /* Operations on chars. */
 #define	atomic_set_char		atomic_set_8
 #define	atomic_set_acq_char	atomic_set_acq_8
 #define	atomic_set_rel_char	atomic_set_rel_8
 #define	atomic_clear_char	atomic_clear_8
 #define	atomic_clear_acq_char	atomic_clear_acq_8
 #define	atomic_clear_rel_char	atomic_clear_rel_8
 #define	atomic_add_char		atomic_add_8
 #define	atomic_add_acq_char	atomic_add_acq_8
 #define	atomic_add_rel_char	atomic_add_rel_8
 #define	atomic_subtract_char	atomic_subtract_8
 #define	atomic_subtract_acq_char	atomic_subtract_acq_8
 #define	atomic_subtract_rel_char	atomic_subtract_rel_8
 
 /* Operations on shorts. */
 #define	atomic_set_short	atomic_set_16
 #define	atomic_set_acq_short	atomic_set_acq_16
 #define	atomic_set_rel_short	atomic_set_rel_16
 #define	atomic_clear_short	atomic_clear_16
 #define	atomic_clear_acq_short	atomic_clear_acq_16
 #define	atomic_clear_rel_short	atomic_clear_rel_16
 #define	atomic_add_short	atomic_add_16
 #define	atomic_add_acq_short	atomic_add_acq_16
 #define	atomic_add_rel_short	atomic_add_rel_16
 #define	atomic_subtract_short	atomic_subtract_16
 #define	atomic_subtract_acq_short	atomic_subtract_acq_16
 #define	atomic_subtract_rel_short	atomic_subtract_rel_16
 
 /* Operations on ints. */
 #define	atomic_set_int		atomic_set_32
 #define	atomic_set_acq_int	atomic_set_acq_32
 #define	atomic_set_rel_int	atomic_set_rel_32
 #define	atomic_clear_int	atomic_clear_32
 #define	atomic_clear_acq_int	atomic_clear_acq_32
 #define	atomic_clear_rel_int	atomic_clear_rel_32
 #define	atomic_add_int		atomic_add_32
 #define	atomic_add_acq_int	atomic_add_acq_32
 #define	atomic_add_rel_int	atomic_add_rel_32
 #define	atomic_subtract_int	atomic_subtract_32
 #define	atomic_subtract_acq_int	atomic_subtract_acq_32
 #define	atomic_subtract_rel_int	atomic_subtract_rel_32
 #define	atomic_cmpset_int	atomic_cmpset_32
 #define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
 #define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
 #define	atomic_fcmpset_int	atomic_fcmpset_32
 #define	atomic_fcmpset_acq_int	atomic_fcmpset_acq_32
 #define	atomic_fcmpset_rel_int	atomic_fcmpset_rel_32
 #define	atomic_load_acq_int	atomic_load_acq_32
 #define	atomic_store_rel_int	atomic_store_rel_32
 #define	atomic_readandclear_int	atomic_readandclear_32
 #define	atomic_readandset_int	atomic_readandset_32
 #define	atomic_fetchadd_int	atomic_fetchadd_32
 
 /*
  * I think the following is right, even for n32.  For n32 the pointers
  * are still 32-bits, so we need to operate on them as 32-bit quantities,
  * even though they are sign extended in operation.  For longs, there's
  * no question because they are always 32-bits.
  */
 #ifdef __mips_n64
 /* Operations on longs. */
 #define	atomic_set_long		atomic_set_64
 #define	atomic_set_acq_long	atomic_set_acq_64
 #define	atomic_set_rel_long	atomic_set_rel_64
 #define	atomic_clear_long	atomic_clear_64
 #define	atomic_clear_acq_long	atomic_clear_acq_64
 #define	atomic_clear_rel_long	atomic_clear_rel_64
 #define	atomic_add_long		atomic_add_64
 #define	atomic_add_acq_long	atomic_add_acq_64
 #define	atomic_add_rel_long	atomic_add_rel_64
 #define	atomic_subtract_long	atomic_subtract_64
 #define	atomic_subtract_acq_long	atomic_subtract_acq_64
 #define	atomic_subtract_rel_long	atomic_subtract_rel_64
 #define	atomic_cmpset_long	atomic_cmpset_64
 #define	atomic_cmpset_acq_long	atomic_cmpset_acq_64
 #define	atomic_cmpset_rel_long	atomic_cmpset_rel_64
 #define	atomic_fcmpset_long	atomic_fcmpset_64
 #define	atomic_fcmpset_acq_long	atomic_fcmpset_acq_64
 #define	atomic_fcmpset_rel_long	atomic_fcmpset_rel_64
 #define	atomic_load_acq_long	atomic_load_acq_64
 #define	atomic_store_rel_long	atomic_store_rel_64
 #define	atomic_fetchadd_long	atomic_fetchadd_64
 #define	atomic_readandclear_long	atomic_readandclear_64
 
 #else /* !__mips_n64 */
 
 /* Operations on longs. */
 #define	atomic_set_long(p, v)						\
 	atomic_set_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_set_acq_long(p, v)					\
 	atomic_set_acq_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_set_rel_long(p, v)					\
 	atomic_set_rel_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_clear_long(p, v)						\
 	atomic_clear_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_clear_acq_long(p, v)					\
 	atomic_clear_acq_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_clear_rel_long(p, v)					\
 	atomic_clear_rel_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_add_long(p, v)						\
 	atomic_add_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_add_acq_long(p, v)					\
 	atomic_add_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_add_rel_long(p, v)					\
 	atomic_add_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_subtract_long(p, v)					\
 	atomic_subtract_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_subtract_acq_long(p, v)					\
 	atomic_subtract_acq_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_subtract_rel_long(p, v)					\
 	atomic_subtract_rel_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_cmpset_long(p, cmpval, newval)				\
 	atomic_cmpset_32((volatile u_int *)(p), (u_int)(cmpval),	\
 	    (u_int)(newval))
 #define	atomic_cmpset_acq_long(p, cmpval, newval)			\
 	atomic_cmpset_acq_32((volatile u_int *)(p), (u_int)(cmpval),	\
 	    (u_int)(newval))
 #define	atomic_cmpset_rel_long(p, cmpval, newval)			\
 	atomic_cmpset_rel_32((volatile u_int *)(p), (u_int)(cmpval),	\
 	    (u_int)(newval))
 #define	atomic_fcmpset_long(p, cmpval, newval)				\
 	atomic_fcmpset_32((volatile u_int *)(p), (u_int *)(cmpval),	\
 	    (u_int)(newval))
 #define	atomic_fcmpset_acq_long(p, cmpval, newval)			\
 	atomic_fcmpset_acq_32((volatile u_int *)(p), (u_int *)(cmpval),	\
 	    (u_int)(newval))
 #define	atomic_fcmpset_rel_long(p, cmpval, newval)			\
 	atomic_fcmpset_rel_32((volatile u_int *)(p), (u_int *)(cmpval),	\
 	    (u_int)(newval))
 #define	atomic_load_acq_long(p)						\
 	(u_long)atomic_load_acq_32((volatile u_int *)(p))
 #define	atomic_store_rel_long(p, v)					\
 	atomic_store_rel_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_fetchadd_long(p, v)					\
 	atomic_fetchadd_32((volatile u_int *)(p), (u_int)(v))
 #define	atomic_readandclear_long(p)					\
 	atomic_readandclear_32((volatile u_int *)(p))
 
 #endif /* __mips_n64 */
 
 /* Operations on pointers. */
 #define	atomic_set_ptr		atomic_set_long
 #define	atomic_set_acq_ptr	atomic_set_acq_long
 #define	atomic_set_rel_ptr	atomic_set_rel_long
 #define	atomic_clear_ptr	atomic_clear_long
 #define	atomic_clear_acq_ptr	atomic_clear_acq_long
 #define	atomic_clear_rel_ptr	atomic_clear_rel_long
 #define	atomic_add_ptr		atomic_add_long
 #define	atomic_add_acq_ptr	atomic_add_acq_long
 #define	atomic_add_rel_ptr	atomic_add_rel_long
 #define	atomic_subtract_ptr	atomic_subtract_long
 #define	atomic_subtract_acq_ptr	atomic_subtract_acq_long
 #define	atomic_subtract_rel_ptr	atomic_subtract_rel_long
 #define	atomic_cmpset_ptr	atomic_cmpset_long
 #define	atomic_cmpset_acq_ptr	atomic_cmpset_acq_long
 #define	atomic_cmpset_rel_ptr	atomic_cmpset_rel_long
 #define	atomic_fcmpset_ptr	atomic_fcmpset_long
 #define	atomic_fcmpset_acq_ptr	atomic_fcmpset_acq_long
 #define	atomic_fcmpset_rel_ptr	atomic_fcmpset_rel_long
 #define	atomic_load_acq_ptr	atomic_load_acq_long
 #define	atomic_store_rel_ptr	atomic_store_rel_long
 #define	atomic_readandclear_ptr	atomic_readandclear_long
 
 #endif /* ! _MACHINE_ATOMIC_H_ */
Index: head/sys/mips/mips/db_interface.c
===================================================================
--- head/sys/mips/mips/db_interface.c	(revision 327096)
+++ head/sys/mips/mips/db_interface.c	(revision 327097)
@@ -1,350 +1,350 @@
 /*	$OpenBSD: db_machdep.c,v 1.2 1998/09/15 10:50:13 pefo Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1998 Per Fogelstrom, Opsycon AB
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed under OpenBSD by
  *	Per Fogelstrom, Opsycon AB, Sweden.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	JNPR: db_interface.c,v 1.6.2.1 2007/08/29 12:24:49 girish
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/lock.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/user.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 
 #include <machine/cache.h>
 #include <machine/db_machdep.h>
 #include <machine/mips_opcode.h>
 #include <machine/vmparam.h>
 #include <machine/md_var.h>
 #include <machine/setjmp.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_access.h>
 #include <ddb/db_output.h>
 #include <ddb/db_variables.h>
 #include <sys/kdb.h>
 
 static db_varfcn_t db_frame;
 
 #define	DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
 struct db_variable db_regs[] = {
 	{ "at",  DB_OFFSET(ast),	db_frame },
 	{ "v0",  DB_OFFSET(v0),		db_frame },
 	{ "v1",  DB_OFFSET(v1),		db_frame },
 	{ "a0",  DB_OFFSET(a0),		db_frame },
 	{ "a1",  DB_OFFSET(a1),		db_frame },
 	{ "a2",  DB_OFFSET(a2),		db_frame },
 	{ "a3",  DB_OFFSET(a3),		db_frame },
 #if defined(__mips_n32) || defined(__mips_n64)
 	{ "a4",  DB_OFFSET(a4),		db_frame },
 	{ "a5",  DB_OFFSET(a5),		db_frame },
 	{ "a6",  DB_OFFSET(a6),		db_frame },
 	{ "a7",  DB_OFFSET(a7),		db_frame },
 	{ "t0",  DB_OFFSET(t0),		db_frame },
 	{ "t1",  DB_OFFSET(t1),		db_frame },
 	{ "t2",  DB_OFFSET(t2),		db_frame },
 	{ "t3",  DB_OFFSET(t3),		db_frame },
 #else
 	{ "t0",  DB_OFFSET(t0),		db_frame },
 	{ "t1",  DB_OFFSET(t1),		db_frame },
 	{ "t2",  DB_OFFSET(t2),		db_frame },
 	{ "t3",  DB_OFFSET(t3),		db_frame },
 	{ "t4",  DB_OFFSET(t4),		db_frame },
 	{ "t5",  DB_OFFSET(t5),		db_frame },
 	{ "t6",  DB_OFFSET(t6),		db_frame },
 	{ "t7",  DB_OFFSET(t7),		db_frame },
 #endif
 	{ "s0",  DB_OFFSET(s0),		db_frame },
 	{ "s1",  DB_OFFSET(s1),		db_frame },
 	{ "s2",  DB_OFFSET(s2),		db_frame },
 	{ "s3",  DB_OFFSET(s3),		db_frame },
 	{ "s4",  DB_OFFSET(s4),		db_frame },
 	{ "s5",  DB_OFFSET(s5),		db_frame },
 	{ "s6",  DB_OFFSET(s6),		db_frame },
 	{ "s7",  DB_OFFSET(s7),		db_frame },
 	{ "t8",  DB_OFFSET(t8),		db_frame },
 	{ "t9",  DB_OFFSET(t9),		db_frame },
 	{ "k0",  DB_OFFSET(k0),		db_frame },
 	{ "k1",  DB_OFFSET(k1),		db_frame },
 	{ "gp",  DB_OFFSET(gp),		db_frame },
 	{ "sp",  DB_OFFSET(sp),		db_frame },
 	{ "s8",  DB_OFFSET(s8),		db_frame },
 	{ "ra",  DB_OFFSET(ra),		db_frame },
 	{ "sr",  DB_OFFSET(sr),		db_frame },
 	{ "lo",  DB_OFFSET(mullo),	db_frame },
 	{ "hi",  DB_OFFSET(mulhi),	db_frame },
 	{ "bad", DB_OFFSET(badvaddr),	db_frame },
 	{ "cs",  DB_OFFSET(cause),	db_frame },
 	{ "pc",  DB_OFFSET(pc),		db_frame },
 };
 struct db_variable *db_eregs = db_regs + nitems(db_regs);
 
 int (*do_db_log_stack_trace_cmd)(char *);
 
 static int
 db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	register_t *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	reg = (register_t *)((uintptr_t)kdb_frame + (size_t)(intptr_t)vp->valuep);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 int
 db_read_bytes(vm_offset_t addr, size_t size, char *data)
 {
 	jmp_buf jb;
 	void *prev_jb;
 	int ret;
 
 	prev_jb = kdb_jmpbuf(jb);
 	ret = setjmp(jb);
 	if (ret == 0) {
 		/*
 		 * 'addr' could be a memory-mapped I/O address.  Try to
 		 * do atomic load/store in unit of size requested.
+		 * size == 8 is only atomic on 64bit or n32 kernel.
 		 */
 		if ((size == 2 || size == 4 || size == 8) &&
 		    ((addr & (size -1)) == 0) &&
 		    (((vm_offset_t)data & (size -1)) == 0)) {
 			switch (size) {
 			case 2:
 				*(uint16_t *)data = *(uint16_t *)addr;
 				break;
 			case 4:
 				*(uint32_t *)data = *(uint32_t *)addr;
 				break;
 			case 8:
-				*(uint64_t *)data = atomic_load_64(
-				    (void *)addr);
+				*(uint64_t *)data = *(uint64_t *)addr;
 				break;
 			}
 		} else {
 			char *src;
 
 			src = (char *)addr;
 			while (size-- > 0)
 				*data++ = *src++;
 		}
 	}
 
 	(void)kdb_jmpbuf(prev_jb);
 	return (ret);
 }
 
 int
 db_write_bytes(vm_offset_t addr, size_t size, char *data)
 {
 	int ret;
 	jmp_buf jb;
 	void *prev_jb;
 
 	prev_jb = kdb_jmpbuf(jb);
 	ret = setjmp(jb);
 
 	if (ret == 0) {
 		/*
 		 * 'addr' could be a memory-mapped I/O address.  Try to
 		 * do atomic load/store in unit of size requested.
+		 * size == 8 is only atomic on 64bit or n32 kernel.
 		 */
 		if ((size == 2 || size == 4 || size == 8) &&
 		    ((addr & (size -1)) == 0) &&
 		    (((vm_offset_t)data & (size -1)) == 0)) {
 			switch (size) {
 			case 2:
 				*(uint16_t *)addr = *(uint16_t *)data;
 				break;
 			case 4:
 				*(uint32_t *)addr = *(uint32_t *)data;
 				break;
 			case 8:
-				atomic_store_64((uint64_t *)addr,
-				    *(uint64_t *)data);
+				*(uint64_t *)addr = *(uint64_t *)data;
 				break;
 			}
 		} else {
 			char *dst;
 			size_t len = size;
 
 			dst = (char *)addr;
 			while (len-- > 0)
 				*dst++ = *data++;
 		}
 
 		mips_icache_sync_range((db_addr_t) addr, size);
 		mips_dcache_wbinv_range((db_addr_t) addr, size);
 	}
 	(void)kdb_jmpbuf(prev_jb);
 	return (ret);
 }
 
 /*
  *	To do a single step ddb needs to know the next address
  *	that we will get to. It means that we need to find out
  *	both the address for a branch taken and for not taken, NOT! :-)
  *	MipsEmulateBranch will do the job to find out _exactly_ which
  *	address we will end up at so the 'dual bp' method is not
  *	requiered.
  */
 db_addr_t
 next_instr_address(db_addr_t pc, boolean_t bd)
 {
 	db_addr_t next;
 
 	next = (db_addr_t)MipsEmulateBranch(kdb_frame, pc, 0, 0);
 	return (next);
 }
 
 
 /*
  *	Decode instruction and figure out type.
  */
 int
 db_inst_type(int ins)
 {
 	InstFmt inst;
 	int	ityp = 0;
 
 	inst.word = ins;
 	switch ((int)inst.JType.op) {
 	case OP_SPECIAL:
 		switch ((int)inst.RType.func) {
 		case OP_JR:
 			ityp = IT_BRANCH;
 			break;
 		case OP_JALR:
 		case OP_SYSCALL:
 			ityp = IT_CALL;
 			break;
 		}
 		break;
 
 	case OP_BCOND:
 		switch ((int)inst.IType.rt) {
 		case OP_BLTZ:
 		case OP_BLTZL:
 		case OP_BGEZ:
 		case OP_BGEZL:
 			ityp = IT_BRANCH;
 			break;
 
 		case OP_BLTZAL:
 		case OP_BLTZALL:
 		case OP_BGEZAL:
 		case OP_BGEZALL:
 			ityp = IT_CALL;
 			break;
 		}
 		break;
 
 	case OP_JAL:
 		ityp = IT_CALL;
 		break;
 
 	case OP_J:
 	case OP_BEQ:
 	case OP_BEQL:
 	case OP_BNE:
 	case OP_BNEL:
 	case OP_BLEZ:
 	case OP_BLEZL:
 	case OP_BGTZ:
 	case OP_BGTZL:
 		ityp = IT_BRANCH;
 		break;
 
 	case OP_COP1:
 		switch (inst.RType.rs) {
 		case OP_BCx:
 		case OP_BCy:
 			ityp = IT_BRANCH;
 			break;
 		}
 		break;
 
 	case OP_LB:
 	case OP_LH:
 	case OP_LW:
 	case OP_LD:
 	case OP_LBU:
 	case OP_LHU:
 	case OP_LWU:
 	case OP_LWC1:
 		ityp = IT_LOAD;
 		break;
 
 	case OP_SB:
 	case OP_SH:
 	case OP_SW:
 	case OP_SD:  
 	case OP_SWC1:
 		ityp = IT_STORE;
 		break;
 	}
 	return (ityp);
 }
 
 /*
  * Return the next pc if the given branch is taken.
  * MachEmulateBranch() runs analysis for branch delay slot.
  */
 db_addr_t
 branch_taken(int inst, db_addr_t pc)
 {
 	db_addr_t ra;
 	register_t fpucsr;
 
 	/* TBD: when is fsr set */
 	fpucsr = (curthread) ? curthread->td_pcb->pcb_regs.fsr : 0;
 	ra = (db_addr_t)MipsEmulateBranch(kdb_frame, pc, fpucsr, 0);
 	return (ra);
 }
Index: head/sys/mips/mips/support.S
===================================================================
--- head/sys/mips/mips/support.S	(revision 327096)
+++ head/sys/mips/mips/support.S	(revision 327097)
@@ -1,1099 +1,1032 @@
 /*	$OpenBSD: locore.S,v 1.18 1998/09/15 10:58:53 pefo Exp $	*/
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Digital Equipment Corporation and Ralph Campbell.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (C) 1989 Digital Equipment Corporation.
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby granted,
  * provided that the above copyright notice appears in all copies.
  * Digital Equipment Corporation makes no representations about the
  * suitability of this software for any purpose.  It is provided "as is"
  * without express or implied warranty.
  *
  * from: Header: /sprite/src/kernel/mach/ds3100.md/RCS/loMem.s,
  *	v 1.1 89/07/11 17:55:04 nelson Exp  SPRITE (DECWRL)
  * from: Header: /sprite/src/kernel/mach/ds3100.md/RCS/machAsm.s,
  *	v 9.2 90/01/29 18:00:39 shirriff Exp  SPRITE (DECWRL)
  * from: Header: /sprite/src/kernel/vm/ds3100.md/vmPmaxAsm.s,
  *	v 1.1 89/07/10 14:27:41 nelson Exp  SPRITE (DECWRL)
  *
  *	from: @(#)locore.s	8.5 (Berkeley) 1/4/94
  *	JNPR: support.S,v 1.5.2.2 2007/08/29 10:03:49 girish
  * $FreeBSD$
  */
 
 /*
  * Copyright (c) 1997 Jonathan Stone (hereinafter referred to as the author)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Jonathan R. Stone for
  *      the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  *	Contains assembly language support routines.
  */
 
 #include "opt_ddb.h"
 #include <sys/errno.h>
 #include <machine/asm.h>
 #include <machine/cpu.h>
 #include <machine/regnum.h>
 #include <machine/cpuregs.h>
 #include <machine/pcb.h>
 
 #include "assym.s"
 
 	.set	noreorder		# Noreorder is default style!
 
 /*
  * Primitives
  */
 
 	.text
 
 /*
  * See if access to addr with a len type instruction causes a machine check.
  * len is length of access (1=byte, 2=short, 4=int)
  *
  * badaddr(addr, len)
  *	char *addr;
  *	int len;
  */
 LEAF(badaddr)
 	PTR_LA	v0, baderr
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	bne	a1, 1, 2f
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	b	5f
 	lbu	v0, (a0)
 2:
 	bne	a1, 2, 4f
 	nop
 	b	5f
 	lhu	v0, (a0)
 4:
 	lw	v0, (a0)
 5:
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	move	v0, zero		# made it w/o errors
 baderr:
 	j	ra
 	li	v0, 1			# trap sends us here
 END(badaddr)
 
 /*
  * int copystr(void *kfaddr, void *kdaddr, size_t maxlen, size_t *lencopied)
  * Copy a NIL-terminated string, at most maxlen characters long.  Return the
  * number of characters copied (including the NIL) in *lencopied.  If the
  * string is too long, return ENAMETOOLONG; else return 0.
  */
 LEAF(copystr)
 	move		t0, a2
 	beq		a2, zero, 4f
 1:
 	lbu		v0, 0(a0)
 	PTR_SUBU	a2, a2, 1
 	beq		v0, zero, 2f
 	sb		v0, 0(a1)		# each byte until NIL
 	PTR_ADDU	a0, a0, 1
 	bne		a2, zero, 1b		# less than maxlen
 	PTR_ADDU	a1, a1, 1
 4:
 	li		v0, ENAMETOOLONG	# run out of space
 2:
 	beq		a3, zero, 3f		# return num. of copied bytes
 	PTR_SUBU	a2, t0, a2		# if the 4th arg was non-NULL
 	PTR_S		a2, 0(a3)
 3:
 	j		ra			# v0 is 0 or ENAMETOOLONG
 	nop
 END(copystr)
 
 
 /*
  * Copy a null terminated string from the user address space into
  * the kernel address space.
  *
  *	copyinstr(fromaddr, toaddr, maxlength, &lencopied)
  *		caddr_t fromaddr;
  *		caddr_t toaddr;
  *		u_int maxlength;
  *		u_int *lencopied;
  */
 NESTED(copyinstr, CALLFRAME_SIZ, ra)
 	PTR_SUBU	sp, sp, CALLFRAME_SIZ
 	.mask	0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ)
 	PTR_LA	v0, copyerr
 	blt	a0, zero, _C_LABEL(copyerr)  # make sure address is in user space
 	REG_S	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	jal	_C_LABEL(copystr)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	REG_L	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	PTR_ADDU	sp, sp, CALLFRAME_SIZ
 END(copyinstr)
 
 /*
  * Copy a null terminated string from the kernel address space into
  * the user address space.
  *
  *	copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
  *		caddr_t fromaddr;
  *		caddr_t toaddr;
  *		u_int maxlength;
  *		u_int *lencopied;
  */
 NESTED(copyoutstr, CALLFRAME_SIZ, ra)
 	PTR_SUBU	sp, sp, CALLFRAME_SIZ
 	.mask	0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ)
 	PTR_LA	v0, copyerr
 	blt	a1, zero, _C_LABEL(copyerr)  # make sure address is in user space
 	REG_S	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	jal	_C_LABEL(copystr)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	REG_L	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	PTR_ADDU	sp, sp, CALLFRAME_SIZ
 END(copyoutstr)
 
 /*
  * Copy specified amount of data from user space into the kernel
  *	copyin(from, to, len)
  *		caddr_t *from;	(user source address)
  *		caddr_t *to;	(kernel destination address)
  *		unsigned len;
  */
 NESTED(copyin, CALLFRAME_SIZ, ra)
 	PTR_SUBU	sp, sp, CALLFRAME_SIZ
 	.mask	0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ)
 	PTR_LA	v0, copyerr
 	blt	a0, zero, _C_LABEL(copyerr)  # make sure address is in user space
 	REG_S	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	jal	_C_LABEL(bcopy)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	REG_L	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)	 	# bcopy modified v1, so reload
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	PTR_ADDU	sp, sp, CALLFRAME_SIZ
 	j	ra
 	move	v0, zero
 END(copyin)
 
 /*
  * Copy specified amount of data from kernel to the user space
  *	copyout(from, to, len)
  *		caddr_t *from;	(kernel source address)
  *		caddr_t *to;	(user destination address)
  *		unsigned len;
  */
 NESTED(copyout, CALLFRAME_SIZ, ra)
 	PTR_SUBU	sp, sp, CALLFRAME_SIZ
 	.mask	0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ)
 	PTR_LA	v0, copyerr
 	blt	a1, zero, _C_LABEL(copyerr) # make sure address is in user space
 	REG_S	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	jal	_C_LABEL(bcopy)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	REG_L	ra, CALLFRAME_RA(sp)
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)	 	# bcopy modified v1, so reload
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	PTR_ADDU	sp, sp, CALLFRAME_SIZ
 	j	ra
 	move	v0, zero
 END(copyout)
 
 LEAF(copyerr)
 	REG_L	ra, CALLFRAME_RA(sp)
 	PTR_ADDU	sp, sp, CALLFRAME_SIZ
 	j	ra
 	li	v0, EFAULT			# return error
 END(copyerr)
 
 /*
  * {fu,su},{ibyte,isword,iword}, fetch or store a byte, short or word to
  * user text space.
  * {fu,su},{byte,sword,word}, fetch or store a byte, short or word to
  * user data space.
  */
 #ifdef __mips_n64
 LEAF(fuword64)
 XLEAF(fuword)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	ld	v0, 0(a0)		# fetch word
 	j	ra
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 END(fuword64)
 #endif
 
 LEAF(fuword32)
 #ifndef __mips_n64
 XLEAF(fuword)
 #endif
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	lw	v0, 0(a0)		# fetch word
 	j	ra
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 END(fuword32)
 
 LEAF(fusword)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	lhu	v0, 0(a0)		# fetch short
 	j	ra
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 END(fusword)
 
 LEAF(fubyte)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	lbu	v0, 0(a0)		# fetch byte
 	j	ra
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 END(fubyte)
 
 LEAF(suword32)
 #ifndef __mips_n64
 XLEAF(suword)
 #endif
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	sw	a1, 0(a0)		# store word
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	move	v0, zero
 END(suword32)
 
 #ifdef __mips_n64
 LEAF(suword64)
 XLEAF(suword)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	sd	a1, 0(a0)		# store word
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	move	v0, zero
 END(suword64)
 #endif
 
 /*
  * casuword(9)
  * <v0>u_long casuword(<a0>u_long *p, <a1>u_long oldval, <a2>u_long newval)
  */
 /*
  * casuword32(9)
  * <v0>uint32_t casuword(<a0>uint32_t *p, <a1>uint32_t oldval, 
  *							<a2>uint32_t newval)
  */
 LEAF(casuword32)
 #ifndef __mips_n64
 XLEAF(casuword)
 #endif
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 1:
 	move	t0, a2
 	ll	v0, 0(a0)
 	bne	a1, v0, 2f
 	nop
 	sc	t0, 0(a0)		# store word
 	beqz	t0, 1b
 	nop
 	j	3f
 	nop
 2:
 	li	v0, -1
 3:
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	jr	ra
 	nop
 END(casuword32)
 
 #ifdef __mips_n64
 LEAF(casuword64)
 XLEAF(casuword)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 1:
 	move	t0, a2
 	lld	v0, 0(a0)
 	bne	a1, v0, 2f
 	nop
 	scd	t0, 0(a0)		# store double word
 	beqz	t0, 1b
 	nop
 	j	3f
 	nop
 2:
 	li	v0, -1
 3:
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	jr	ra
 	nop
 END(casuword64)
 #endif
 
 /*
  * Will have to flush the instruction cache if byte merging is done in hardware.
  */
 LEAF(susword)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	sh	a1, 0(a0)		# store short
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	move	v0, zero
 END(susword)
 
 LEAF(subyte)
 	PTR_LA	v0, fswberr
 	blt	a0, zero, fswberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	sb	a1, 0(a0)		# store byte
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	move	v0, zero
 END(subyte)
 
 LEAF(fswberr)
 	j	ra
 	li	v0, -1
 END(fswberr)
 
 /*
  * fuswintr and suswintr are just like fusword and susword except that if
  * the page is not in memory or would cause a trap, then we return an error.
  * The important thing is to prevent sleep() and switch().
  */
 LEAF(fuswintr)
 	PTR_LA	v0, fswintrberr
 	blt	a0, zero, fswintrberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	lhu	v0, 0(a0)		# fetch short
 	j	ra
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 END(fuswintr)
 
 LEAF(suswintr)
 	PTR_LA	v0, fswintrberr
 	blt	a0, zero, fswintrberr	# make sure address is in user space
 	nop
 	GET_CPU_PCPU(v1)
 	PTR_L	v1, PC_CURPCB(v1)
 	PTR_S	v0, U_PCB_ONFAULT(v1)
 	sh	a1, 0(a0)		# store short
 	PTR_S	zero, U_PCB_ONFAULT(v1)
 	j	ra
 	move	v0, zero
 END(suswintr)
 
 LEAF(fswintrberr)
 	j	ra
 	li	v0, -1
 END(fswintrberr)
 
 /*
  * memset(void *s1, int c, int len)
  * NetBSD: memset.S,v 1.3 2001/10/16 15:40:53 uch Exp
  */
 LEAF(memset)
 	.set noreorder
 	blt	a2, 12, memsetsmallclr	# small amount to clear?
 	move	v0, a0			# save s1 for result
 
 	sll	t1, a1, 8		# compute  c << 8 in t1
 	or	t1, t1, a1		# compute c << 8 | c in 11
 	sll	t2, t1, 16		# shift that left 16
 	or	t1, t2, t1		# or together
 
 	PTR_SUBU	t0, zero, a0		# compute # bytes to word align address
 	and	t0, t0, 3
 	beq	t0, zero, 1f		# skip if word aligned
 	PTR_SUBU	a2, a2, t0		# subtract from remaining count
 	SWHI	t1, 0(a0)		# store 1, 2, or 3 bytes to align
 	PTR_ADDU	a0, a0, t0
 1:
 	and	v1, a2, 3		# compute number of whole words left
 	PTR_SUBU	t0, a2, v1
 	PTR_SUBU	a2, a2, t0
 	PTR_ADDU	t0, t0, a0		# compute ending address
 2:
 	PTR_ADDU	a0, a0, 4		# clear words
 	bne	a0, t0, 2b		#  unrolling loop does not help
 	sw	t1, -4(a0)		#  since we are limited by memory speed
 
 memsetsmallclr:
 	ble	a2, zero, 2f
 	PTR_ADDU	t0, a2, a0		# compute ending address
 1:
 	PTR_ADDU	a0, a0, 1		# clear bytes
 	bne	a0, t0, 1b
 	sb	a1, -1(a0)
 2:
 	j	ra
 	nop
 	.set reorder
 END(memset)
 
 /*
  * bzero(s1, n)
  */
 LEAF(bzero)
 XLEAF(blkclr)
 	.set	noreorder
 	blt	a1, 12, smallclr	# small amount to clear?
 	PTR_SUBU	a3, zero, a0		# compute # bytes to word align address
 	and	a3, a3, 3
 	beq	a3, zero, 1f		# skip if word aligned
 	PTR_SUBU	a1, a1, a3		# subtract from remaining count
 	SWHI	zero, 0(a0)		# clear 1, 2, or 3 bytes to align
 	PTR_ADDU	a0, a0, a3
 1:
 	and	v0, a1, 3		# compute number of words left
 	PTR_SUBU	a3, a1, v0
 	move	a1, v0
 	PTR_ADDU	a3, a3, a0		# compute ending address
 2:
 	PTR_ADDU	a0, a0, 4		# clear words
 	bne	a0, a3, 2b		#  unrolling loop does not help
 	sw	zero, -4(a0)		#  since we are limited by memory speed
 smallclr:
 	ble	a1, zero, 2f
 	PTR_ADDU	a3, a1, a0		# compute ending address
 1:
 	PTR_ADDU	a0, a0, 1		# clear bytes
 	bne	a0, a3, 1b
 	sb	zero, -1(a0)
 2:
 	j	ra
 	nop
 END(bzero)
 
 
 /*
  * bcmp(s1, s2, n)
  */
 LEAF(bcmp)
 	.set	noreorder
 	blt	a2, 16, smallcmp	# is it worth any trouble?
 	xor	v0, a0, a1		# compare low two bits of addresses
 	and	v0, v0, 3
 	PTR_SUBU	a3, zero, a1		# compute # bytes to word align address
 	bne	v0, zero, unalignedcmp	# not possible to align addresses
 	and	a3, a3, 3
 
 	beq	a3, zero, 1f
 	PTR_SUBU	a2, a2, a3		# subtract from remaining count
 	move	v0, v1			# init v0,v1 so unmodified bytes match
 	LWHI	v0, 0(a0)		# read 1, 2, or 3 bytes
 	LWHI	v1, 0(a1)
 	PTR_ADDU	a1, a1, a3
 	bne	v0, v1, nomatch
 	PTR_ADDU	a0, a0, a3
 1:
 	and	a3, a2, ~3		# compute number of whole words left
 	PTR_SUBU	a2, a2, a3		#   which has to be >= (16-3) & ~3
 	PTR_ADDU	a3, a3, a0		# compute ending address
 2:
 	lw	v0, 0(a0)		# compare words
 	lw	v1, 0(a1)
 	PTR_ADDU	a0, a0, 4
 	bne	v0, v1, nomatch
 	PTR_ADDU	a1, a1, 4
 	bne	a0, a3, 2b
 	nop
 	b	smallcmp		# finish remainder
 	nop
 unalignedcmp:
 	beq	a3, zero, 2f
 	PTR_SUBU	a2, a2, a3		# subtract from remaining count
 	PTR_ADDU	a3, a3, a0		# compute ending address
 1:
 	lbu	v0, 0(a0)		# compare bytes until a1 word aligned
 	lbu	v1, 0(a1)
 	PTR_ADDU	a0, a0, 1
 	bne	v0, v1, nomatch
 	PTR_ADDU	a1, a1, 1
 	bne	a0, a3, 1b
 	nop
 2:
 	and	a3, a2, ~3		# compute number of whole words left
 	PTR_SUBU	a2, a2, a3		#   which has to be >= (16-3) & ~3
 	PTR_ADDU	a3, a3, a0		# compute ending address
 3:
 	LWHI	v0, 0(a0)		# compare words a0 unaligned, a1 aligned
 	LWLO	v0, 3(a0)
 	lw	v1, 0(a1)
 	PTR_ADDU	a0, a0, 4
 	bne	v0, v1, nomatch
 	PTR_ADDU	a1, a1, 4
 	bne	a0, a3, 3b
 	nop
 smallcmp:
 	ble	a2, zero, match
 	PTR_ADDU	a3, a2, a0		# compute ending address
 1:
 	lbu	v0, 0(a0)
 	lbu	v1, 0(a1)
 	PTR_ADDU	a0, a0, 1
 	bne	v0, v1, nomatch
 	PTR_ADDU	a1, a1, 1
 	bne	a0, a3, 1b
 	nop
 match:
 	j	ra
 	 move	v0, zero
 nomatch:
 	j	ra
 	li	v0, 1
 END(bcmp)
 
 
 /*
  * bit = ffs(value)
  */
 LEAF(ffs)
 	.set	noreorder
 	beq	a0, zero, 2f
 	move	v0, zero
 1:
 	and	v1, a0, 1		# bit set?
 	addu	v0, v0, 1
 	beq	v1, zero, 1b		# no, continue
 	srl	a0, a0, 1
 2:
 	j	ra
 	nop
 END(ffs)
 
 /**
  * void
  * atomic_set_16(u_int16_t *a, u_int16_t b)
  * {
  *	*a |= b;
  * }
  */
 LEAF(atomic_set_16)
 	.set	noreorder
 	srl	a0, a0, 2	# round down address to be 32-bit aligned
 	sll	a0, a0, 2
 	andi	a1, a1, 0xffff
 1:
 	ll	t0, 0(a0)
 	or	t0, t0, a1
 	sc	t0, 0(a0)
 	beq	t0, zero, 1b
 	nop
 	j	ra
 	nop
 END(atomic_set_16)
 
 /**
  * void
  * atomic_clear_16(u_int16_t *a, u_int16_t b)
  * {
  *	*a &= ~b;
  * }
  */
 LEAF(atomic_clear_16)
 	.set	noreorder
 	srl	a0, a0, 2	# round down address to be 32-bit aligned
 	sll	a0, a0, 2
 	nor	a1, zero, a1
 1:
 	ll	t0, 0(a0)
 	move	t1, t0
 	andi	t1, t1, 0xffff	# t1 has the original lower 16 bits
 	and	t1, t1, a1	# t1 has the new lower 16 bits
 	srl	t0, t0, 16	# preserve original top 16 bits
 	sll	t0, t0, 16
 	or	t0, t0, t1
 	sc	t0, 0(a0)
 	beq	t0, zero, 1b
 	nop
 	j	ra
 	nop
 END(atomic_clear_16)
 
 
 /**
  * void
  * atomic_subtract_16(uint16_t *a, uint16_t b)
  * {
  *	*a -= b;
  * }
  */
 LEAF(atomic_subtract_16)
 	.set	noreorder
 	srl	a0, a0, 2	# round down address to be 32-bit aligned
 	sll	a0, a0, 2
 1:
 	ll	t0, 0(a0)
 	move	t1, t0
 	andi	t1, t1, 0xffff	# t1 has the original lower 16 bits
 	subu	t1, t1, a1
 	andi	t1, t1, 0xffff	# t1 has the new lower 16 bits
 	srl	t0, t0, 16	# preserve original top 16 bits
 	sll	t0, t0, 16
 	or	t0, t0, t1
 	sc	t0, 0(a0)
 	beq	t0, zero, 1b
 	nop
 	j	ra
 	nop
 END(atomic_subtract_16)
 
 /**
  * void
  * atomic_add_16(uint16_t *a, uint16_t b)
  * {
  *	*a += b;
  * }
  */
 LEAF(atomic_add_16)
 	.set	noreorder
 	srl	a0, a0, 2	# round down address to be 32-bit aligned
 	sll	a0, a0, 2
 1:
 	ll	t0, 0(a0)
 	move	t1, t0
 	andi	t1, t1, 0xffff	# t1 has the original lower 16 bits
 	addu	t1, t1, a1
 	andi	t1, t1, 0xffff	# t1 has the new lower 16 bits
 	srl	t0, t0, 16	# preserve original top 16 bits
 	sll	t0, t0, 16
 	or	t0, t0, t1
 	sc	t0, 0(a0)
 	beq	t0, zero, 1b
 	nop
 	j	ra
 	nop
 END(atomic_add_16)
 
 /**
  * void
  * atomic_add_8(uint8_t *a, uint8_t b)
  * {
  *	*a += b;
  * }
  */
 LEAF(atomic_add_8)
 	.set	noreorder
 	srl	a0, a0, 2	# round down address to be 32-bit aligned
 	sll	a0, a0, 2
 1:
 	ll	t0, 0(a0)
 	move	t1, t0
 	andi	t1, t1, 0xff	# t1 has the original lower 8 bits
 	addu	t1, t1, a1
 	andi	t1, t1, 0xff	# t1 has the new lower 8 bits
 	srl	t0, t0, 8	# preserve original top 24 bits
 	sll	t0, t0, 8
 	or	t0, t0, t1
 	sc	t0, 0(a0)
 	beq	t0, zero, 1b
 	nop
 	j	ra
 	nop
 END(atomic_add_8)
 
 
 /**
  * void
  * atomic_subtract_8(uint8_t *a, uint8_t b)
  * {
  *	*a += b;
  * }
  */
 LEAF(atomic_subtract_8)
 	.set	noreorder
 	srl	a0, a0, 2	# round down address to be 32-bit aligned
 	sll	a0, a0, 2
 1:
 	ll	t0, 0(a0)
 	move	t1, t0
 	andi	t1, t1, 0xff	# t1 has the original lower 8 bits
 	subu	t1, t1, a1
 	andi	t1, t1, 0xff	# t1 has the new lower 8 bits
 	srl	t0, t0, 8	# preserve original top 24 bits
 	sll	t0, t0, 8
 	or	t0, t0, t1
 	sc	t0, 0(a0)
 	beq	t0, zero, 1b
 	nop
 	j	ra
 	nop
 END(atomic_subtract_8)
 
-/*
- *	atomic 64-bit register read/write assembly language support routines.
- */
-
 	.set	noreorder		# Noreorder is default style!
-
-#if !defined(__mips_n64) && !defined(__mips_n32)	
-	/*
-	 * I don't know if these routines have the right number of
-	 * NOPs in it for all processors.  XXX
-	 *
-	 * Maybe it would be better to just leave this undefined in that case.
-	 *
-	 * XXX These routines are not safe in the case of a TLB miss on a1 or
-	 *     a0 unless the trapframe is 64-bit, which it just isn't with O32.
-	 *     If we take any exception, not just an interrupt, the upper
-	 *     32-bits will be clobbered.  Use only N32 and N64 kernels if you
-	 *     want to use 64-bit registers while interrupts are enabled or
-	 *     with memory operations.  Since this isn't even using load-linked
-	 *     and store-conditional, perhaps it should just use two registers
-	 *     instead, as is right and good with the O32 ABI.
-	 */
-LEAF(atomic_store_64)
-	mfc0	t1, MIPS_COP_0_STATUS
-	and	t2, t1, ~MIPS_SR_INT_IE
-	mtc0	t2, MIPS_COP_0_STATUS
-	nop
-	nop
-	nop
-	nop
-	ld	t0, (a1)
-	nop
-	nop
-	sd	t0, (a0)
-	nop
-	nop
-	mtc0	t1,MIPS_COP_0_STATUS
-	nop
-	nop
-	nop
-	nop
-	j	ra
-	nop
-END(atomic_store_64)
-
-LEAF(atomic_load_64)
-	mfc0	t1, MIPS_COP_0_STATUS
-	and	t2, t1, ~MIPS_SR_INT_IE
-	mtc0	t2, MIPS_COP_0_STATUS
-	nop
-	nop
-	nop
-	nop
-	ld	t0, (a0)
-	nop
-	nop
-	sd	t0, (a1)
-	nop
-	nop
-	mtc0	t1,MIPS_COP_0_STATUS
-	nop
-	nop
-	nop
-	nop
-	j	ra
-	nop
-END(atomic_load_64)
-#endif
 
 #if defined(DDB) || defined(DEBUG)
 
 LEAF(kdbpeek)
 	PTR_LA	v1, ddberr
 	and	v0, a0, 3			# unaligned ?
 	GET_CPU_PCPU(t1)
 	PTR_L	t1, PC_CURPCB(t1)
 	bne	v0, zero, 1f
 	PTR_S	v1, U_PCB_ONFAULT(t1)
 
 	lw	v0, (a0)
 	jr	ra
 	PTR_S	zero, U_PCB_ONFAULT(t1)
 
 1:
 	LWHI	v0, 0(a0)
 	LWLO	v0, 3(a0)
 	jr	ra
 	PTR_S	zero, U_PCB_ONFAULT(t1)
 END(kdbpeek)
 
 LEAF(kdbpeekd)
 	PTR_LA	v1, ddberr
 	and	v0, a0, 3			# unaligned ?
 	GET_CPU_PCPU(t1)
 	PTR_L	t1, PC_CURPCB(t1)
 	bne	v0, zero, 1f
 	PTR_S	v1, U_PCB_ONFAULT(t1)
 
 	ld	v0, (a0)
 	jr	ra
 	PTR_S	zero, U_PCB_ONFAULT(t1)
 
 1:
 	REG_LHI	v0, 0(a0)
 	REG_LLO	v0, 7(a0)
 	jr	ra
 	PTR_S	zero, U_PCB_ONFAULT(t1)
 END(kdbpeekd)
 
 ddberr:
 	jr	ra
 	nop
 
 #if defined(DDB)
 LEAF(kdbpoke)
 	PTR_LA	v1, ddberr
 	and	v0, a0, 3			# unaligned ?
 	GET_CPU_PCPU(t1)
 	PTR_L	t1, PC_CURPCB(t1)
 	bne	v0, zero, 1f
 	PTR_S	v1, U_PCB_ONFAULT(t1)
 
 	sw	a1, (a0)
 	jr	ra
 	PTR_S	zero, U_PCB_ONFAULT(t1)
 
 1:
 	SWHI	a1, 0(a0)
 	SWLO	a1, 3(a0)
 	jr	ra
 	PTR_S	zero, U_PCB_ONFAULT(t1)
 END(kdbpoke)
 
 	.data
 	.globl	esym
 esym:	.word	0
 
 #endif /* DDB */
 #endif /* DDB || DEBUG */
 
 	.text
 LEAF(breakpoint)
 	break	MIPS_BREAK_SOVER_VAL
 	jr	ra
 	nop
 END(breakpoint)
 
 LEAF(setjmp)
 	mfc0	v0, MIPS_COP_0_STATUS	# Later the "real" spl value!
 	REG_S	s0, (SZREG * PCB_REG_S0)(a0)
 	REG_S	s1, (SZREG * PCB_REG_S1)(a0)
 	REG_S	s2, (SZREG * PCB_REG_S2)(a0)
 	REG_S	s3, (SZREG * PCB_REG_S3)(a0)
 	REG_S	s4, (SZREG * PCB_REG_S4)(a0)
 	REG_S	s5, (SZREG * PCB_REG_S5)(a0)
 	REG_S	s6, (SZREG * PCB_REG_S6)(a0)
 	REG_S	s7, (SZREG * PCB_REG_S7)(a0)
 	REG_S	s8, (SZREG * PCB_REG_S8)(a0)
 	REG_S	sp, (SZREG * PCB_REG_SP)(a0)
 	REG_S	ra, (SZREG * PCB_REG_RA)(a0)
 	REG_S	v0, (SZREG * PCB_REG_SR)(a0)
 	jr	ra
 	li	v0, 0			# setjmp return
 END(setjmp)
 
 LEAF(longjmp)
 	REG_L	v0, (SZREG * PCB_REG_SR)(a0)
 	REG_L	ra, (SZREG * PCB_REG_RA)(a0)
 	REG_L	s0, (SZREG * PCB_REG_S0)(a0)
 	REG_L	s1, (SZREG * PCB_REG_S1)(a0)
 	REG_L	s2, (SZREG * PCB_REG_S2)(a0)
 	REG_L	s3, (SZREG * PCB_REG_S3)(a0)
 	REG_L	s4, (SZREG * PCB_REG_S4)(a0)
 	REG_L	s5, (SZREG * PCB_REG_S5)(a0)
 	REG_L	s6, (SZREG * PCB_REG_S6)(a0)
 	REG_L	s7, (SZREG * PCB_REG_S7)(a0)
 	REG_L	s8, (SZREG * PCB_REG_S8)(a0)
 	REG_L	sp, (SZREG * PCB_REG_SP)(a0)
 	mtc0	v0, MIPS_COP_0_STATUS	# Later the "real" spl value!
 	ITLBNOPFIX
 	jr	ra
 	li	v0, 1			# longjmp return
 END(longjmp)
 
 LEAF(mips3_ld)
 	.set push
 	.set noreorder
 	.set mips64
 #if defined(__mips_o32)
 	mfc0	t0, MIPS_COP_0_STATUS		# turn off interrupts
 	and	t1, t0, ~(MIPS_SR_INT_IE)
 	mtc0	t1, MIPS_COP_0_STATUS
 	COP0_SYNC
 	nop
 	nop
 	nop
 
 	ld	v0, 0(a0)
 #if _BYTE_ORDER == _BIG_ENDIAN
 	dsll	v1, v0, 32
 	dsra	v1, v1, 32			# low word in v1
 	dsra	v0, v0, 32			# high word in v0
 #else
 	dsra	v1, v0, 32			# high word in v1
 	dsll	v0, v0, 32
 	dsra	v0, v0, 32			# low word in v0
 #endif
 
 	mtc0	t0, MIPS_COP_0_STATUS		# restore intr status.
 	COP0_SYNC
 	nop
 #else /* !__mips_o32 */
 	ld	v0, 0(a0)
 #endif /* !__mips_o32 */
 
 	jr	ra
 	nop
 	.set pop
 END(mips3_ld)
 
 LEAF(mips3_sd)
 	.set push
 	.set mips64
 	.set noreorder
 #if defined(__mips_o32)
 	mfc0	t0, MIPS_COP_0_STATUS		# turn off interrupts
 	and	t1, t0, ~(MIPS_SR_INT_IE)
 	mtc0	t1, MIPS_COP_0_STATUS
 	COP0_SYNC
 	nop
 	nop
 	nop
 
 	# NOTE: a1 is padding!
 
 #if _BYTE_ORDER == _BIG_ENDIAN
 	dsll	a2, a2, 32			# high word in a2
 	dsll	a3, a3, 32			# low word in a3
 	dsrl	a3, a3, 32
 #else
 	dsll	a2, a2, 32			# low word in a2
 	dsrl	a2, a2, 32
 	dsll	a3, a3, 32			# high word in a3
 #endif
 	or	a1, a2, a3
 	sd	a1, 0(a0)
 
 	mtc0	t0, MIPS_COP_0_STATUS		# restore intr status.
 	COP0_SYNC
 	nop
 #else /* !__mips_o32 */
 	sd	a1, 0(a0)
 #endif /* !__mips_o32 */
 
 	jr	ra
 	nop
 	.set pop
 END(mips3_sd)