diff --git a/lib/libc/powerpcspe/gen/fpsetmask.c b/lib/libc/powerpcspe/gen/fpsetmask.c
index 9763b78a0919..e71b822d6e0b 100644
--- a/lib/libc/powerpcspe/gen/fpsetmask.c
+++ b/lib/libc/powerpcspe/gen/fpsetmask.c
@@ -1,53 +1,53 @@
 /*	$NetBSD: fpsetmask.c,v 1.3 2002/01/13 21:45:48 thorpej Exp $	*/
 
 /*
  * Copyright (c) 2016 Justin Hibbits
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Dan Winship.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <machine/spr.h>
 #include <ieeefp.h>
 
 #ifndef _SOFT_FLOAT
 fp_except_t
 fpsetmask(fp_except_t mask)
 {
 	uint32_t fpscr;
 	fp_rnd_t old;
 
 	__asm__ __volatile("mfspr %0, %1" : "=r"(fpscr) : "K"(SPR_SPEFSCR));
 	old = (fp_rnd_t)((fpscr >> 2) & 0x1f);
 	fpscr = (fpscr & 0xffffff83) | (mask << 2);
-	__asm__ __volatile("mtspr %1,%0" :: "r"(fpscr), "K"(SPR_SPEFSCR));
+	__asm__ __volatile("mtspr %1,%0;isync" :: "r"(fpscr), "K"(SPR_SPEFSCR));
 	return (old);
 }
 #endif
diff --git a/lib/libc/powerpcspe/gen/fpsetround.c b/lib/libc/powerpcspe/gen/fpsetround.c
index b5340a6d9ea2..2e2469cdae5e 100644
--- a/lib/libc/powerpcspe/gen/fpsetround.c
+++ b/lib/libc/powerpcspe/gen/fpsetround.c
@@ -1,53 +1,53 @@
 /*	$NetBSD: fpsetround.c,v 1.3 2002/01/13 21:45:48 thorpej Exp $	*/
 
 /*
  * Copyright (c) 2016 Justin Hibbits
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Dan Winship.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <machine/spr.h>
 #include <ieeefp.h>
 
 #ifndef _SOFT_FLOAT
 fp_rnd_t
 fpsetround(fp_rnd_t rnd_dir)
 {
 	uint32_t fpscr;
 	fp_rnd_t old;
 
 	__asm__ __volatile("mfspr %0, %1" : "=r"(fpscr) : "K"(SPR_SPEFSCR) );
 	old = (fp_rnd_t)(fpscr & 0x3);
 	fpscr = (fpscr & 0xfffffffc) | rnd_dir;
-	__asm__ __volatile("mtspr %1, %0" :: "r"(fpscr), "K"(SPR_SPEFSCR));
+	__asm__ __volatile("mtspr %1, %0;isync" :: "r"(fpscr), "K"(SPR_SPEFSCR));
 	return (old);
 }
 #endif
diff --git a/lib/msun/powerpc/fenv.h b/lib/msun/powerpc/fenv.h
index af9eee999b70..3e2a11c48c50 100644
--- a/lib/msun/powerpc/fenv.h
+++ b/lib/msun/powerpc/fenv.h
@@ -1,286 +1,291 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_FENV_H_
 #define	_FENV_H_
 
 #include <sys/_types.h>
+#include <machine/endian.h>
 
 #ifndef	__fenv_static
 #define	__fenv_static	static
 #endif
 
 typedef	__uint32_t	fenv_t;
 typedef	__uint32_t	fexcept_t;
 
 /* Exception flags */
 #define	FE_INEXACT	0x02000000
 #define	FE_DIVBYZERO	0x04000000
 #define	FE_UNDERFLOW	0x08000000
 #define	FE_OVERFLOW	0x10000000
 #define	FE_INVALID	0x20000000	/* all types of invalid FP ops */
 
 /*
  * The PowerPC architecture has extra invalid flags that indicate the
  * specific type of invalid operation occurred.  These flags may be
  * tested, set, and cleared---but not masked---separately.  All of
  * these bits are cleared when FE_INVALID is cleared, but only
  * FE_VXSOFT is set when FE_INVALID is explicitly set in software.
  */
 #define	FE_VXCVI	0x00000100	/* invalid integer convert */
 #define	FE_VXSQRT	0x00000200	/* square root of a negative */
 #define	FE_VXSOFT	0x00000400	/* software-requested exception */
 #define	FE_VXVC		0x00080000	/* ordered comparison involving NaN */
 #define	FE_VXIMZ	0x00100000	/* inf * 0 */
 #define	FE_VXZDZ	0x00200000	/* 0 / 0 */
 #define	FE_VXIDI	0x00400000	/* inf / inf */
 #define	FE_VXISI	0x00800000	/* inf - inf */
 #define	FE_VXSNAN	0x01000000	/* operation on a signalling NaN */
 #define	FE_ALL_INVALID	(FE_VXCVI | FE_VXSQRT | FE_VXSOFT | FE_VXVC | \
 			 FE_VXIMZ | FE_VXZDZ | FE_VXIDI | FE_VXISI | \
 			 FE_VXSNAN | FE_INVALID)
 #define	FE_ALL_EXCEPT	(FE_DIVBYZERO | FE_INEXACT | \
 			 FE_ALL_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
 
 /* Rounding modes */
 #define	FE_TONEAREST	0x0000
 #define	FE_TOWARDZERO	0x0001
 #define	FE_UPWARD	0x0002
 #define	FE_DOWNWARD	0x0003
 #define	_ROUND_MASK	(FE_TONEAREST | FE_DOWNWARD | \
 			 FE_UPWARD | FE_TOWARDZERO)
 
 __BEGIN_DECLS
 
 /* Default floating-point environment */
 extern const fenv_t	__fe_dfl_env;
 #define	FE_DFL_ENV	(&__fe_dfl_env)
 
 /* We need to be able to map status flag positions to mask flag positions */
 #define	_FPUSW_SHIFT	22
 #define	_ENABLE_MASK	((FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
 			 FE_OVERFLOW | FE_UNDERFLOW) >> _FPUSW_SHIFT)
 
 #ifndef _SOFT_FLOAT
 #ifdef __SPE__
-#define	__mffs(__env)	__asm __volatile("mfspr %0, 512" : "=r" (*(__env)))
-#define	__mtfsf(__env)	__asm __volatile("mtspr 512,%0" : : "r" (__env))
+#define	__mffs(__env) \
+	__asm __volatile("mfspr %0, 512" : "=r" ((__env)->__bits.__reg))
+#define	__mtfsf(__env) \
+	__asm __volatile("mtspr 512,%0;isync" :: "r" ((__env).__bits.__reg))
 #else
-#define	__mffs(__env)	__asm __volatile("mffs %0" : "=f" (*(__env)))
-#define	__mtfsf(__env)	__asm __volatile("mtfsf 255,%0" : : "f" (__env))
+#define	__mffs(__env) \
+	__asm __volatile("mffs %0" : "=f" ((__env)->__d))
+#define	__mtfsf(__env) \
+	__asm __volatile("mtfsf 255,%0" :: "f" ((__env).__d))
 #endif
 #else
 #define	__mffs(__env)
 #define	__mtfsf(__env)
 #endif
 
 union __fpscr {
 	double __d;
 	struct {
 #if _BYTE_ORDER == _LITTLE_ENDIAN
 		fenv_t __reg;
 		__uint32_t __junk;
 #else
 		__uint32_t __junk;
 		fenv_t __reg;
 #endif
 	} __bits;
 };
 
 __fenv_static inline int
 feclearexcept(int __excepts)
 {
 	union __fpscr __r;
 
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_ALL_INVALID;
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__r.__bits.__reg &= ~__excepts;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 __fenv_static inline int
 fegetexceptflag(fexcept_t *__flagp, int __excepts)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	*__flagp = __r.__bits.__reg & __excepts;
 	return (0);
 }
 
 __fenv_static inline int
 fesetexceptflag(const fexcept_t *__flagp, int __excepts)
 {
 	union __fpscr __r;
 
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_ALL_EXCEPT;
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__r.__bits.__reg &= ~__excepts;
 	__r.__bits.__reg |= *__flagp & __excepts;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 __fenv_static inline int
 feraiseexcept(int __excepts)
 {
 	union __fpscr __r;
 
 	if (__excepts & FE_INVALID)
 		__excepts |= FE_VXSOFT;
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__r.__bits.__reg |= __excepts;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 __fenv_static inline int
 fetestexcept(int __excepts)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	return (__r.__bits.__reg & __excepts);
 }
 
 __fenv_static inline int
 fegetround(void)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	return (__r.__bits.__reg & _ROUND_MASK);
 }
 
 __fenv_static inline int
 fesetround(int __round)
 {
 	union __fpscr __r;
 
 	if (__round & ~_ROUND_MASK)
 		return (-1);
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__r.__bits.__reg &= ~_ROUND_MASK;
 	__r.__bits.__reg |= __round;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 __fenv_static inline int
 fegetenv(fenv_t *__envp)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	*__envp = __r.__bits.__reg;
 	return (0);
 }
 
 __fenv_static inline int
 feholdexcept(fenv_t *__envp)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	*__envp = __r.__d;
 	__r.__bits.__reg &= ~(FE_ALL_EXCEPT | _ENABLE_MASK);
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 __fenv_static inline int
 fesetenv(const fenv_t *__envp)
 {
 	union __fpscr __r;
 
 	__r.__bits.__reg = *__envp;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 __fenv_static inline int
 feupdateenv(const fenv_t *__envp)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__r.__bits.__reg &= FE_ALL_EXCEPT;
 	__r.__bits.__reg |= *__envp;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return (0);
 }
 
 #if __BSD_VISIBLE
 
 /* We currently provide no external definitions of the functions below. */
 
 static inline int
 feenableexcept(int __mask)
 {
 	union __fpscr __r;
 	fenv_t __oldmask;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__oldmask = __r.__bits.__reg;
 	__r.__bits.__reg |= (__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT;
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return ((__oldmask & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 
 static inline int
 fedisableexcept(int __mask)
 {
 	union __fpscr __r;
 	fenv_t __oldmask;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	__oldmask = __r.__bits.__reg;
 	__r.__bits.__reg &= ~((__mask & FE_ALL_EXCEPT) >> _FPUSW_SHIFT);
-	__mtfsf(__r.__d);
+	__mtfsf(__r);
 	return ((__oldmask & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 
 static inline int
 fegetexcept(void)
 {
 	union __fpscr __r;
 
-	__mffs(&__r.__d);
+	__mffs(&__r);
 	return ((__r.__bits.__reg & _ENABLE_MASK) << _FPUSW_SHIFT);
 }
 
 #endif /* __BSD_VISIBLE */
 
 __END_DECLS
 
 #endif	/* !_FENV_H_ */
diff --git a/sys/powerpc/booke/spe.c b/sys/powerpc/booke/spe.c
index 19436de666f4..d189b15730a3 100644
--- a/sys/powerpc/booke/spe.c
+++ b/sys/powerpc/booke/spe.c
@@ -1,692 +1,692 @@
 /*-
  * Copyright (C) 1996 Wolfgang Solfrank.
  * Copyright (C) 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$NetBSD: fpu.c,v 1.5 2001/07/22 11:29:46 wiz Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 
 #include <machine/altivec.h>
 #include <machine/fpu.h>
 #include <machine/ieeefp.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 
 #include <powerpc/fpu/fpu_arith.h>
 #include <powerpc/fpu/fpu_emu.h>
 #include <powerpc/fpu/fpu_extern.h>
 
 void spe_handle_fpdata(struct trapframe *);
 void spe_handle_fpround(struct trapframe *);
 static int spe_emu_instr(uint32_t, struct fpemu *, struct fpn **, uint32_t *);
 
 static void
 save_vec_int(struct thread *td)
 {
 	int	msr;
 	struct	pcb *pcb;
 
 	pcb = td->td_pcb;
 
 	/*
 	 * Temporarily re-enable the vector unit during the save
 	 */
 	msr = mfmsr();
 	mtmsr(msr | PSL_VEC);
 
 	/*
 	 * Save the vector registers and SPEFSCR to the PCB
 	 */
 #define EVSTDW(n)   __asm ("evstdw %1,0(%0)" \
 		:: "b"(pcb->pcb_vec.vr[n]), "n"(n));
 	EVSTDW(0);	EVSTDW(1);	EVSTDW(2);	EVSTDW(3);
 	EVSTDW(4);	EVSTDW(5);	EVSTDW(6);	EVSTDW(7);
 	EVSTDW(8);	EVSTDW(9);	EVSTDW(10);	EVSTDW(11);
 	EVSTDW(12);	EVSTDW(13);	EVSTDW(14);	EVSTDW(15);
 	EVSTDW(16);	EVSTDW(17);	EVSTDW(18);	EVSTDW(19);
 	EVSTDW(20);	EVSTDW(21);	EVSTDW(22);	EVSTDW(23);
 	EVSTDW(24);	EVSTDW(25);	EVSTDW(26);	EVSTDW(27);
 	EVSTDW(28);	EVSTDW(29);	EVSTDW(30);	EVSTDW(31);
 #undef EVSTDW
 
 	__asm ( "evxor 0,0,0\n"
 		"evmwumiaa 0,0,0\n"
 		"evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.spare[0]));
 	pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR);
 
 	/*
 	 * Disable vector unit again
 	 */
 	isync();
 	mtmsr(msr);
 
 }
 
 void
 enable_vec(struct thread *td)
 {
 	int	msr;
 	struct	pcb *pcb;
 	struct	trapframe *tf;
 
 	pcb = td->td_pcb;
 	tf = trapframe(td);
 
 	/*
 	 * Save the thread's SPE CPU number, and set the CPU's current
 	 * vector thread
 	 */
 	td->td_pcb->pcb_veccpu = PCPU_GET(cpuid);
 	PCPU_SET(vecthread, td);
 
 	/*
 	 * Enable the vector unit for when the thread returns from the
 	 * exception. If this is the first time the unit has been used by
 	 * the thread, initialise the vector registers and VSCR to 0, and
 	 * set the flag to indicate that the vector unit is in use.
 	 */
 	tf->srr1 |= PSL_VEC;
 	if (!(pcb->pcb_flags & PCB_VEC)) {
 		memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec);
 		pcb->pcb_flags |= PCB_VEC;
 		pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR);
 	}
 
 	/*
 	 * Temporarily enable the vector unit so the registers
 	 * can be restored.
 	 */
 	msr = mfmsr();
 	mtmsr(msr | PSL_VEC);
 
 	/* Restore SPEFSCR and ACC.  Use %r0 as the scratch for ACC. */
 	mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr);
-	__asm __volatile("evldd 0, 0(%0); evmra 0,0\n"
+	__asm __volatile("isync;evldd 0, 0(%0); evmra 0,0\n"
 	    :: "b"(&pcb->pcb_vec.spare[0]));
 
 	/* 
 	 * The lower half of each register will be restored on trap return.  Use
 	 * %r0 as a scratch register, and restore it last.
 	 */
 #define	EVLDW(n)   __asm __volatile("evldw 0, 0(%0); evmergehilo "#n",0,"#n \
 	    :: "b"(&pcb->pcb_vec.vr[n]));
 	EVLDW(1);	EVLDW(2);	EVLDW(3);	EVLDW(4);
 	EVLDW(5);	EVLDW(6);	EVLDW(7);	EVLDW(8);
 	EVLDW(9);	EVLDW(10);	EVLDW(11);	EVLDW(12);
 	EVLDW(13);	EVLDW(14);	EVLDW(15);	EVLDW(16);
 	EVLDW(17);	EVLDW(18);	EVLDW(19);	EVLDW(20);
 	EVLDW(21);	EVLDW(22);	EVLDW(23);	EVLDW(24);
 	EVLDW(25);	EVLDW(26);	EVLDW(27);	EVLDW(28);
 	EVLDW(29);	EVLDW(30);	EVLDW(31);	EVLDW(0);
 #undef EVLDW
 
 	isync();
 	mtmsr(msr);
 }
 
 void
 save_vec(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 
 	save_vec_int(td);
 
 	/*
 	 * Clear the current vec thread and pcb's CPU id
 	 * XXX should this be left clear to allow lazy save/restore ?
 	 */
 	pcb->pcb_veccpu = INT_MAX;
 	PCPU_SET(vecthread, NULL);
 }
 
 /*
  * Save SPE state without dropping ownership.  This will only save state if
  * the current vector-thread is `td'.  This is used for taking core dumps, so
  * don't leak kernel information; overwrite the low words of each vector with
  * their real value, taken from the thread's trap frame, unconditionally.
  */
 void
 save_vec_nodrop(struct thread *td)
 {
 	struct thread *vtd;
 	struct pcb *pcb;
 	int i;
 
 	vtd = PCPU_GET(vecthread);
 	if (td == vtd) {
 		save_vec_int(td);
 	}
 
 	pcb = td->td_pcb;
 
 	for (i = 0; i < 32; i++) {
 		pcb->pcb_vec.vr[i][1] =
 		    td->td_frame ? td->td_frame->fixreg[i] : 0;
 	}
 }
 
 
 #define	SPE_INST_MASK	0x31f
 #define	EADD	0x200
 #define	ESUB	0x201
 #define	EABS	0x204
 #define	ENABS	0x205
 #define	ENEG	0x206
 #define	EMUL	0x208
 #define	EDIV	0x209
 #define	ECMPGT	0x20c
 #define	ECMPLT	0x20d
 #define	ECMPEQ	0x20e
 #define	ECFUI	0x210
 #define	ECFSI	0x211
 #define	ECTUI	0x214
 #define	ECTSI	0x215
 #define	ECTUF	0x216
 #define	ECTSF	0x217
 #define	ECTUIZ	0x218
 #define	ECTSIZ	0x21a
 
 #define	SPE		0x4
 #define	SPFP		0x6
 #define	DPFP		0x7
 
 #define	SPE_OPC		4
 #define	OPC_SHIFT	26
 
 #define	EVFSADD		0x280
 #define	EVFSSUB		0x281
 #define	EVFSABS		0x284
 #define	EVFSNABS	0x285
 #define	EVFSNEG		0x286
 #define	EVFSMUL		0x288
 #define	EVFSDIV		0x289
 #define	EVFSCMPGT	0x28c
 #define	EVFSCMPLT	0x28d
 #define	EVFSCMPEQ	0x28e
 #define	EVFSCFUI	0x290
 #define	EVFSCFSI	0x291
 #define	EVFSCTUI	0x294
 #define	EVFSCTSI	0x295
 #define	EVFSCTUF	0x296
 #define	EVFSCTSF	0x297
 #define	EVFSCTUIZ	0x298
 #define	EVFSCTSIZ	0x29a
 
 #define	EFSADD		0x2c0
 #define	EFSSUB		0x2c1
 #define	EFSABS		0x2c4
 #define	EFSNABS		0x2c5
 #define	EFSNEG		0x2c6
 #define	EFSMUL		0x2c8
 #define	EFSDIV		0x2c9
 #define	EFSCMPGT	0x2cc
 #define	EFSCMPLT	0x2cd
 #define	EFSCMPEQ	0x2ce
 #define	EFSCFD		0x2cf
 #define	EFSCFUI		0x2d0
 #define	EFSCFSI		0x2d1
 #define	EFSCTUI		0x2d4
 #define	EFSCTSI		0x2d5
 #define	EFSCTUF		0x2d6
 #define	EFSCTSF		0x2d7
 #define	EFSCTUIZ	0x2d8
 #define	EFSCTSIZ	0x2da
 
 #define	EFDADD		0x2e0
 #define	EFDSUB		0x2e1
 #define	EFDABS		0x2e4
 #define	EFDNABS		0x2e5
 #define	EFDNEG		0x2e6
 #define	EFDMUL		0x2e8
 #define	EFDDIV		0x2e9
 #define	EFDCMPGT	0x2ec
 #define	EFDCMPLT	0x2ed
 #define	EFDCMPEQ	0x2ee
 #define	EFDCFS		0x2ef
 #define	EFDCFUI		0x2f0
 #define	EFDCFSI		0x2f1
 #define	EFDCTUI		0x2f4
 #define	EFDCTSI		0x2f5
 #define	EFDCTUF		0x2f6
 #define	EFDCTSF		0x2f7
 #define	EFDCTUIZ	0x2f8
 #define	EFDCTSIZ	0x2fa
 
 enum {
 	NONE,
 	SINGLE,
 	DOUBLE,
 	VECTOR,
 };
 
 static uint32_t fpscr_to_spefscr(uint32_t fpscr)
 {
 	uint32_t spefscr;
 
 	spefscr = 0;
 
 	if (fpscr & FPSCR_VX)
 		spefscr |= SPEFSCR_FINV;
 	if (fpscr & FPSCR_OX)
 		spefscr |= SPEFSCR_FOVF;
 	if (fpscr & FPSCR_UX)
 		spefscr |= SPEFSCR_FUNF;
 	if (fpscr & FPSCR_ZX)
 		spefscr |= SPEFSCR_FDBZ;
 	if (fpscr & FPSCR_XX)
 		spefscr |= SPEFSCR_FX;
 
 	return (spefscr);
 }
 
 /* Sign is 0 for unsigned, 1 for signed. */
 static int
 spe_to_int(struct fpemu *fpemu, struct fpn *fpn, uint32_t *val, int sign)
 {
 	uint32_t res[2];
 
 	res[0] = fpu_ftox(fpemu, fpn, res);
 	if (res[0] != UINT_MAX && res[0] != 0)
 		fpemu->fe_cx |= FPSCR_OX;
 	else if (sign == 0 && res[0] != 0)
 		fpemu->fe_cx |= FPSCR_UX;
 	else
 		*val = res[1];
 
 	return (0);
 }
 
 /* Masked instruction */
 /*
  * For compare instructions, returns 1 if success, 0 if not.  For all others,
  * returns -1, or -2 if no result needs recorded.
  */
 static int
 spe_emu_instr(uint32_t instr, struct fpemu *fpemu,
     struct fpn **result, uint32_t *iresult)
 {
 	switch (instr & SPE_INST_MASK) {
 	case EABS:
 	case ENABS:
 	case ENEG:
 		/* Taken care of elsewhere. */
 		break;
 	case ECTUIZ:
 		fpemu->fe_cx &= ~FPSCR_RN;
 		fpemu->fe_cx |= FP_RZ;
 	case ECTUI:
 		spe_to_int(fpemu, &fpemu->fe_f2, iresult, 0);
 		return (-1);
 	case ECTSIZ:
 		fpemu->fe_cx &= ~FPSCR_RN;
 		fpemu->fe_cx |= FP_RZ;
 	case ECTSI:
 		spe_to_int(fpemu, &fpemu->fe_f2, iresult, 1);
 		return (-1);
 	case EADD:
 		*result = fpu_add(fpemu);
 		break;
 	case ESUB:
 		*result = fpu_sub(fpemu);
 		break;
 	case EMUL:
 		*result = fpu_mul(fpemu);
 		break;
 	case EDIV:
 		*result = fpu_div(fpemu);
 		break;
 	case ECMPGT:
 		fpu_compare(fpemu, 0);
 		if (fpemu->fe_cx & FPSCR_FG)
 			return (1);
 		return (0);
 	case ECMPLT:
 		fpu_compare(fpemu, 0);
 		if (fpemu->fe_cx & FPSCR_FL)
 			return (1);
 		return (0);
 	case ECMPEQ:
 		fpu_compare(fpemu, 0);
 		if (fpemu->fe_cx & FPSCR_FE)
 			return (1);
 		return (0);
 	default:
 		printf("Unknown instruction %x\n", instr);
 	}
 
 	return (-1);
 }
 
 static int
 spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type,
     uint32_t hi, uint32_t lo)
 {
 	uint32_t s;
 
 	fp->fp_sign = hi >> 31;
 	fp->fp_sticky = 0;
 	switch (type) {
 	case SINGLE:
 		s = fpu_stof(fp, hi);
 		break;
 
 	case DOUBLE:
 		s = fpu_dtof(fp, hi, lo);
 		break;
 	}
 
 	if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) {
 		/*
 		 * Input is a signalling NaN.  All operations that return
 		 * an input NaN operand put it through a ``NaN conversion'',
 		 * which basically just means ``turn on the quiet bit''.
 		 * We do this here so that all NaNs internally look quiet
 		 * (we can tell signalling ones by their class).
 		 */
 		fp->fp_mant[0] |= FP_QUIETBIT;
 		fe->fe_cx = FPSCR_VXSNAN;	/* assert invalid operand */
 		s = FPC_SNAN;
 	}
 	fp->fp_class = s;
 
 	return (0);
 }
 
 /*
  * Save the high word of a 64-bit GPR for manipulation in the exception handler.
  */
 static uint32_t
 spe_save_reg_high(int reg)
 {
 	uint32_t vec[2];
 #define EVSTDW(n)   case n: __asm __volatile ("evstdw %1,0(%0)" \
 		:: "b"(vec), "n"(n) : "memory"); break;
 	switch (reg) {
 	EVSTDW(0);	EVSTDW(1);	EVSTDW(2);	EVSTDW(3);
 	EVSTDW(4);	EVSTDW(5);	EVSTDW(6);	EVSTDW(7);
 	EVSTDW(8);	EVSTDW(9);	EVSTDW(10);	EVSTDW(11);
 	EVSTDW(12);	EVSTDW(13);	EVSTDW(14);	EVSTDW(15);
 	EVSTDW(16);	EVSTDW(17);	EVSTDW(18);	EVSTDW(19);
 	EVSTDW(20);	EVSTDW(21);	EVSTDW(22);	EVSTDW(23);
 	EVSTDW(24);	EVSTDW(25);	EVSTDW(26);	EVSTDW(27);
 	EVSTDW(28);	EVSTDW(29);	EVSTDW(30);	EVSTDW(31);
 	}
 #undef EVSTDW
 
 	return (vec[0]);
 }
 
 /*
  * Load the given value into the high word of the requested register.
  */
 static void
 spe_load_reg_high(int reg, uint32_t val)
 {
 #define	EVLDW(n)   case n: __asm __volatile("evmergelo "#n",%0,"#n \
 	    :: "r"(val)); break;
 	switch (reg) {
 	EVLDW(1);	EVLDW(2);	EVLDW(3);	EVLDW(4);
 	EVLDW(5);	EVLDW(6);	EVLDW(7);	EVLDW(8);
 	EVLDW(9);	EVLDW(10);	EVLDW(11);	EVLDW(12);
 	EVLDW(13);	EVLDW(14);	EVLDW(15);	EVLDW(16);
 	EVLDW(17);	EVLDW(18);	EVLDW(19);	EVLDW(20);
 	EVLDW(21);	EVLDW(22);	EVLDW(23);	EVLDW(24);
 	EVLDW(25);	EVLDW(26);	EVLDW(27);	EVLDW(28);
 	EVLDW(29);	EVLDW(30);	EVLDW(31);	EVLDW(0);
 	}
 #undef EVLDW
 
 }
 
 void
 spe_handle_fpdata(struct trapframe *frame)
 {
 	struct fpemu fpemu;
 	struct fpn *result;
 	uint32_t instr, instr_sec_op;
 	uint32_t cr_shift, ra, rb, rd, src;
 	uint32_t high, low, res, tmp; /* For vector operations. */
 	uint32_t spefscr = 0;
 	uint32_t ftod_res[2];
 	int width; /* Single, Double, Vector, Integer */
 	int err;
 	uint32_t msr;
 
 	err = fueword32((void *)frame->srr0, &instr);
 	
 	if (err != 0)
 		return;
 		/* Fault. */;
 
 	if ((instr >> OPC_SHIFT) != SPE_OPC)
 		return;
 
 	msr = mfmsr();
 	/*
 	 * 'cr' field is the upper 3 bits of rd.  Magically, since a) rd is 5
 	 * bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is
 	 * modified for most compare operations, the full value of rd can be
 	 * used as a shift value.
 	 */
 	rd = (instr >> 21) & 0x1f;
 	ra = (instr >> 16) & 0x1f;
 	rb = (instr >> 11) & 0x1f;
 	src = (instr >> 5) & 0x7;
 	cr_shift = 28 - (rd & 0x1f);
 
 	instr_sec_op = (instr & 0x7ff);
 
 	memset(&fpemu, 0, sizeof(fpemu));
 
 	width = NONE;
 	switch (src) {
 	case SPE:
 		mtmsr(msr | PSL_VEC);
 		switch (instr_sec_op) {
 		case EVFSABS:
 			high = spe_save_reg_high(ra) & ~(1U << 31);
 			frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31);
 			spe_load_reg_high(rd, high);
 			break;
 		case EVFSNABS:
 			high = spe_save_reg_high(ra) | (1U << 31);
 			frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31);
 			spe_load_reg_high(rd, high);
 			break;
 		case EVFSNEG:
 			high = spe_save_reg_high(ra) ^ (1U << 31);
 			frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31);
 			spe_load_reg_high(rd, high);
 			break;
 		default:
 			/* High word */
 			spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
 			    spe_save_reg_high(ra), 0);
 			spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
 			    spe_save_reg_high(rb), 0);
 			high = spe_emu_instr(instr_sec_op, &fpemu, &result,
 			    &tmp);
 
 			if (high < 0)
 				spe_load_reg_high(rd, tmp);
 
 			spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16;
 			/* Clear the fpemu to start over on the lower bits. */
 			memset(&fpemu, 0, sizeof(fpemu));
 
 			/* Now low word */
 			spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
 			    frame->fixreg[ra], 0);
 			spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
 			    frame->fixreg[rb], 0);
 			spefscr |= fpscr_to_spefscr(fpemu.fe_cx);
 			low = spe_emu_instr(instr_sec_op, &fpemu, &result,
 			    &frame->fixreg[rd]);
 			if (instr_sec_op == EVFSCMPEQ ||
 			    instr_sec_op == EVFSCMPGT ||
 			    instr_sec_op == EVFSCMPLT) {
 				res = (high << 3) | (low << 2) |
 				    ((high | low) << 1) | (high & low);
 				width = NONE;
 			} else
 				width = VECTOR;
 			break;
 		}
 		goto end;
 
 	case SPFP:
 		switch (instr_sec_op) {
 		case EFSABS:
 			frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31);
 			break;
 		case EFSNABS:
 			frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31);
 			break;
 		case EFSNEG:
 			frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31);
 			break;
 		case EFSCFD:
 			mtmsr(msr | PSL_VEC);
 			spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE,
 			    spe_save_reg_high(rb), frame->fixreg[rb]);
 			result = &fpemu.fe_f3;
 			width = SINGLE;
 			break;
 		default:
 			spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
 			    frame->fixreg[ra], 0);
 			spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
 			    frame->fixreg[rb], 0);
 			width = SINGLE;
 		}
 		break;
 	case DPFP:
 		mtmsr(msr | PSL_VEC);
 		switch (instr_sec_op) {
 		case EFDABS:
 			high = spe_save_reg_high(ra) & ~(1U << 31);
 			frame->fixreg[rd] = frame->fixreg[ra];
 			spe_load_reg_high(rd, high);
 			break;
 		case EFDNABS:
 			high = spe_save_reg_high(ra) | (1U << 31);
 			frame->fixreg[rd] = frame->fixreg[ra];
 			spe_load_reg_high(rd, high);
 			break;
 		case EFDNEG:
 			high = spe_save_reg_high(ra) ^ (1U << 31);
 			frame->fixreg[rd] = frame->fixreg[ra];
 			spe_load_reg_high(rd, high);
 			break;
 		case EFDCFS:
 			spe_explode(&fpemu, &fpemu.fe_f3, SINGLE,
 			    frame->fixreg[rb], 0);
 			result = &fpemu.fe_f3;
 			width = DOUBLE;
 			break;
 		default:
 			spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE,
 			    spe_save_reg_high(ra), frame->fixreg[ra]);
 			spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE,
 			    spe_save_reg_high(rb), frame->fixreg[rb]);
 			width = DOUBLE;
 		}
 		break;
 	}
 	switch (instr_sec_op) {
 	case EFDCFS:
 	case EFSCFD:
 		/* Already handled. */
 		break;
 	default:
 		res = spe_emu_instr(instr_sec_op, &fpemu, &result,
 		    &frame->fixreg[rd]);
 		if (res != -1)
 			res <<= 2;
 		break;
 	}
 
 	switch (instr_sec_op & SPE_INST_MASK) {
 	case ECMPEQ:
 	case ECMPGT:
 	case ECMPLT:
 		frame->cr &= ~(0xf << cr_shift);
 		frame->cr |= (res << cr_shift);
 		break;
 	case ECTUI:
 	case ECTUIZ:
 	case ECTSI:
 	case ECTSIZ:
 		break;
 	default:
 		switch (width) {
 		case NONE:
 		case VECTOR:
 			break;
 		case SINGLE:
 			frame->fixreg[rd] = fpu_ftos(&fpemu, result);
 			break;
 		case DOUBLE:
 			spe_load_reg_high(rd, fpu_ftod(&fpemu, result, ftod_res));
 			frame->fixreg[rd] = ftod_res[1];
 			break;
 		default:
 			panic("Unknown storage width %d", width);
 			break;
 		}
 	}
 
 end:
 	spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS);
 	mtspr(SPR_SPEFSCR, spefscr);
 	frame->srr0 += 4;
 	mtmsr(msr);
 
 	return;
 }
 
 void
 spe_handle_fpround(struct trapframe *frame)
 {
 
 	/*
 	 * Punt fpround exceptions for now.  This leaves the truncated result in
 	 * the register.  We'll deal with overflow/underflow later.
 	 */
 	return;
 }