Index: head/share/man/man9/fpu_kern.9
===================================================================
--- head/share/man/man9/fpu_kern.9	(revision 307666)
+++ head/share/man/man9/fpu_kern.9	(revision 307667)
@@ -1,210 +1,212 @@
 .\" Copyright (c) 2014
 .\"	Konstantin Belousov <kib@FreeBSD.org>.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 23, 2014
+.Dd October 20, 2016
 .Dt FPU_KERN 9
 .Os
 .Sh NAME
 .Nm fpu_kern
 .Nd "facility to use the FPU in the kernel"
 .Sh SYNOPSIS
 .Ft struct fpu_kern_ctx *
 .Fn fpu_kern_alloc_ctx "u_int flags"
 .Ft void
 .Fn fpu_kern_free_ctx "struct fpu_kern_ctx *ctx"
 .Ft int
 .Fn fpu_kern_enter "struct thread *td" "struct fpu_kern_ctx *ctx" "u_int flags"
 .Ft int
 .Fn fpu_kern_leave "struct thread *td" "struct fpu_kern_ctx *ctx"
 .Ft int
 .Fn fpu_kern_thread "u_int flags"
 .Ft int
 .Fn is_fpu_kern_thread "u_int flags"
 .Sh DESCRIPTION
 The
 .Nm
 family of functions allows the use of FPU hardware in kernel code.
 Modern FPUs are not limited to providing hardware implementation for
 floating point arithmetic; they offer advanced accelerators for cryptography
 and other computational-intensive algorithms.
 These facilities share registers with the FPU hardware.
 .Pp
 Typical kernel code does not need access to the FPU.
 Saving a large register file on each entry to the kernel would waste
 time.
 When kernel code uses the FPU, the current FPU state must be saved to
 avoid corrupting the user-mode state, and vice versa.
 .Pp
 The management of the save and restore is automatic.
 The processor catches accesses to the FPU registers
 when the non-current context tries to access them.
 Explicit calls are required for the allocation of the save area and
 the notification of the start and end of the code using the FPU.
 .Pp
 The
 .Fn fpu_kern_alloc_ctx
 function allocates the memory used by
 .Nm
 to track the use of the FPU hardware state and the related software state.
 The
 .Fn fpu_kern_alloc_ctx
 function requires the
 .Fa flags
 argument, which currently accepts the following flags:
 .Bl -tag -width ".Dv FPU_KERN_NOWAIT" -offset indent
 .It Dv FPU_KERN_NOWAIT
 Do not wait for the available memory if the request could not be satisfied
 without sleep.
 .It 0
 No special handling is required.
 .El
 .Pp
 The function returns the allocated context area, or
 .Va NULL
 if the allocation failed.
 .Pp
 The
 .Fn fpu_kern_free_ctx
 function frees the context previously allocated by
 .Fn fpu_kern_alloc_ctx .
 .Pp
 The
 .Fn fpu_kern_enter
 function designates the start of the region of kernel code where the
 use of the FPU is allowed.
 Its arguments are:
 .Bl -tag -width ".Fa ctx" -offset indent
 .It Fa td
 Currently must be
 .Va curthread .
 .It Fa ctx
 The context save area previously allocated by
 .Fn fpu_kern_alloc_ctx
 and not currently in use by another call to
 .Fn fpu_kern_enter .
 .It Fa flags
 This argument currently accepts the following flags:
 .Bl -tag -width ".Dv FPU_KERN_NORMAL" -offset indent
 .It Dv FPU_KERN_NORMAL
 Indicates that the caller intends to access the full FPU state.
 Must be specified currently.
 .It Dv FPU_KERN_KTHR
 Indicates that no saving of the current FPU state should be performed,
 if the thread called
 .Xr fpu_kern_thread 9
 function.
 This is intended to minimize code duplication in callers which
 could be used from both kernel thread and syscall contexts.
 The
 .Fn fpu_kern_leave
 function correctly handles such contexts.
 .It Dv FPU_KERN_NOCTX
 Avoid nesting save area.
 If the flag is specified, the
 .Fa ctx
 must be passed as
 .Va NULL .
 The flag should only be used for really short code blocks
 which can be executed in a critical section.
 It avoids the need to allocate the FPU context by the cost
 of increased system latency.
 .El
 .El
 .Pp
 The function does not sleep or block.
-It could cause the
+It could cause an FPU trap during execution, and on the first FPU access
+after the function returns, as well as after each context switch.
+On i386 and amd64 this will be the
 .Nm Device Not Available
-exception during execution, and on the first FPU access after the
-function returns, as well as after each context switch
-(see Intel Software Developer Manual for the reference).
+exception (see Intel Software Developer Manual for the reference).
 Currently, no errors are defined which can be returned by
 .Fn fpu_kern_enter
 to the caller.
 .Pp
 The
 .Fn fpu_kern_leave
 function ends the region started by
 .Fn fpu_kern_enter .
 The uses of FPU in the kernel after the call to
 .Fn fpu_kern_leave
 are erroneous until the next call to
 .Fn fpu_kern_enter
 is performed.
 The function takes the
 .Fa td
 thread argument, which currently must be
 .Va curthread ,
 and the
 .Fa ctx
 context pointer, previously passed to
 .Fn fpu_kern_enter .
 After the function returns, the context may be freed or reused
 by other invocation of
 .Fn fpu_kern_enter .
 There are no errors defined for the function, it always returns 0.
 .Pp
 The
 .Fn fpu_kern_thread
 function enables an optimization for threads which never leave to
 the usermode.
 The current thread will reuse the usermode save area for the kernel FPU state
 instead of requiring an explicitly allocated context.
 There are no flags defined for the function, and no error states
 that the function returns.
 Once this function has been called, neither
 .Fn fpu_kern_enter
 nor
 .Fn fpu_kern_leave
 is required to be called and the fpu is available for use in the calling thread.
 .Pp
 The
 .Fn is_fpu_kern_thread
 function returns the boolean indicating whether the current thread
 entered the mode enabled by
 .Fn fpu_kern_thread .
 There is currently no flags defined for the function, the return
 value is true if the current thread have the permanent FPU save area,
 and false otherwise.
 .Sh NOTES
 The
 .Nm
-is currently implemented only for the i386 and amd64 architectures.
+is currently implemented only for the i386, amd64, and arm64 architectures.
 .Pp
 There is no way to handle floating point exceptions raised from
 kernel mode.
 .Pp
 The unused
 .Fa flags
 arguments
 to the
 .Nm
 functions are to be extended to allow specification of the
 set of the FPU hardware state used by the code region.
 This would allow optimizations of saving and restoring the state.
 .Sh AUTHORS
 The
 .Nm
 facitily and this manual page were written by
 .An Konstantin Belousov Aq Mt kib@FreeBSD.org .
+The arm64 support was added by
+.An Andrew Turner Aq Mt andrew@FreeBSD.org .
Index: head/sys/arm64/arm64/trap.c
===================================================================
--- head/sys/arm64/arm64/trap.c	(revision 307666)
+++ head/sys/arm64/arm64/trap.c	(revision 307667)
@@ -1,423 +1,434 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #ifdef KDB
 #include <sys/kdb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/pcpu.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef KDB
 #include <machine/db_machdep.h>
 #endif
 
 #ifdef DDB
 #include <ddb/db_output.h>
 #endif
 
 extern register_t fsu_intr_fault;
 
 /* Called from exception.S */
 void do_el1h_sync(struct trapframe *);
 void do_el0_sync(struct trapframe *);
 void do_el0_error(struct trapframe *);
 static void print_registers(struct trapframe *frame);
 
 int (*dtrace_invop_jump_addr)(struct trapframe *);
 
 static __inline void
 call_trapsignal(struct thread *td, int sig, int code, void *addr)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = code;
 	ksi.ksi_addr = addr;
 	trapsignal(td, &ksi);
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	register_t *ap;
 	int nap;
 
 	nap = 8;
 	p = td->td_proc;
 	ap = td->td_frame->tf_x;
 
 	sa->code = td->td_frame->tf_x[8];
 
 	if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
 		sa->code = *ap++;
 		nap--;
 	}
 
 	if (p->p_sysent->sv_mask)
 		sa->code &= p->p_sysent->sv_mask;
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 	memcpy(sa->args, ap, nap * sizeof(register_t));
 	if (sa->narg > nap)
 		panic("ARM64TODO: Could we have more than 8 args?");
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = 0;
 
 	return (0);
 }
 
 #include "../../kern/subr_syscall.c"
 
 static void
 svc_handler(struct trapframe *frame)
 {
 	struct syscall_args sa;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 
 	error = syscallenter(td, &sa);
 	syscallret(td, error, &sa);
 }
 
 static void
 data_abort(struct trapframe *frame, uint64_t esr, uint64_t far, int lower)
 {
 	struct vm_map *map;
 	struct thread *td;
 	struct proc *p;
 	struct pcb *pcb;
 	vm_prot_t ftype;
 	vm_offset_t va;
 	int error, sig, ucode;
 
 	/*
 	 * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive
 	 * and Store-Exclusive instruction usage restrictions", state
 	 * of the exclusive monitors after data abort exception is unknown.
 	 */
 	clrex();
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		return;
 	}
 #endif
 
 	td = curthread;
 	pcb = td->td_pcb;
 
 	/*
 	 * Special case for fuswintr and suswintr. These can't sleep so
 	 * handle them early on in the trap handler.
 	 */
 	if (__predict_false(pcb->pcb_onfault == (vm_offset_t)&fsu_intr_fault)) {
 		frame->tf_elr = pcb->pcb_onfault;
 		return;
 	}
 
 	p = td->td_proc;
 	if (lower)
 		map = &p->p_vmspace->vm_map;
 	else {
 		/* The top bit tells us which range to use */
 		if ((far >> 63) == 1) {
 			map = kernel_map;
 		} else {
 			map = &p->p_vmspace->vm_map;
 			if (map == NULL)
 				map = kernel_map;
 		}
 	}
 
 	if (pmap_fault(map->pmap, esr, far) == KERN_SUCCESS)
 		return;
 
 	KASSERT(td->td_md.md_spinlock_count == 0,
 	    ("data abort with spinlock held"));
 	if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK |
 	    WARN_GIANTOK, NULL, "Kernel page fault") != 0) {
 		print_registers(frame);
 		printf(" far: %16lx\n", far);
 		printf(" esr:         %.8lx\n", esr);
 		panic("data abort in critical section or under mutex");
 	}
 
 	va = trunc_page(far);
 	ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
 
 	/* Fault in the page. */
 	error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	if (error != KERN_SUCCESS) {
 		if (lower) {
 			sig = SIGSEGV;
 			if (error == KERN_PROTECTION_FAILURE)
 				ucode = SEGV_ACCERR;
 			else
 				ucode = SEGV_MAPERR;
 			call_trapsignal(td, sig, ucode, (void *)far);
 		} else {
 			if (td->td_intr_nesting_level == 0 &&
 			    pcb->pcb_onfault != 0) {
 				frame->tf_x[0] = error;
 				frame->tf_elr = pcb->pcb_onfault;
 				return;
 			}
 
 			printf("Fatal data abort:\n");
 			print_registers(frame);
 			printf(" far: %16lx\n", far);
 			printf(" esr:         %.8lx\n", esr);
 
 #ifdef KDB
 			if (debugger_on_panic || kdb_active)
 				if (kdb_trap(ESR_ELx_EXCEPTION(esr), 0, frame))
 					return;
 #endif
 			panic("vm_fault failed: %lx", frame->tf_elr);
 		}
 	}
 
 	if (lower)
 		userret(td, frame);
 }
 
 static void
 print_registers(struct trapframe *frame)
 {
 	u_int reg;
 
 	for (reg = 0; reg < 31; reg++) {
 		printf(" %sx%d: %16lx\n", (reg < 10) ? " " : "", reg,
 		    frame->tf_x[reg]);
 	}
 	printf("  sp: %16lx\n", frame->tf_sp);
 	printf("  lr: %16lx\n", frame->tf_lr);
 	printf(" elr: %16lx\n", frame->tf_elr);
 	printf("spsr: %16lx\n", frame->tf_spsr);
 }
 
 void
 do_el1h_sync(struct trapframe *frame)
 {
 	uint32_t exception;
 	uint64_t esr, far;
 
 	/* Read the esr register to get the exception details */
 	esr = READ_SPECIALREG(esr_el1);
 	exception = ESR_ELx_EXCEPTION(esr);
 
 #ifdef KDTRACE_HOOKS
 	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception))
 		return;
 #endif
 
 	CTR4(KTR_TRAP,
 	    "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p",
 	    curthread, esr, frame->tf_elr, frame);
 
 	switch(exception) {
 	case EXCP_FP_SIMD:
 	case EXCP_TRAP_FP:
-		print_registers(frame);
-		printf(" esr:         %.8lx\n", esr);
-		panic("VFP exception in the kernel");
+#ifdef VFP
+		if ((curthread->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) {
+			vfp_restore_state();
+		} else
+#endif
+		{
+			print_registers(frame);
+			printf(" esr:         %.8lx\n", esr);
+			panic("VFP exception in the kernel");
+		}
+		break;
 	case EXCP_INSN_ABORT:
 	case EXCP_DATA_ABORT:
 		far = READ_SPECIALREG(far_el1);
 		intr_enable();
 		data_abort(frame, esr, far, 0);
 		break;
 	case EXCP_BRK:
 #ifdef KDTRACE_HOOKS
 		if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \
 		    dtrace_invop_jump_addr != 0) {
 			dtrace_invop_jump_addr(frame);
 			break;
 		}
 #endif
 		/* FALLTHROUGH */
 	case EXCP_WATCHPT_EL1:
 	case EXCP_SOFTSTP_EL1:
 #ifdef KDB
 		kdb_trap(exception, 0, frame);
 #else
 		panic("No debugger in kernel.\n");
 #endif
 		break;
 	default:
 		print_registers(frame);
 		panic("Unknown kernel exception %x esr_el1 %lx\n", exception,
 		    esr);
 	}
 }
 
 /*
  * The attempted execution of an instruction bit pattern that has no allocated
  * instruction results in an exception with an unknown reason.
  */
 static void
 el0_excp_unknown(struct trapframe *frame, uint64_t far)
 {
 	struct thread *td;
 
 	td = curthread;
 	call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far);
 	userret(td, frame);
 }
 
 void
 do_el0_sync(struct trapframe *frame)
 {
 	struct thread *td;
 	uint32_t exception;
 	uint64_t esr, far;
 
 	/* Check we have a sane environment when entering from userland */
 	KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS,
 	    ("Invalid pcpu address from userland: %p (tpidr %lx)",
 	     get_pcpu(), READ_SPECIALREG(tpidr_el1)));
 
 	td = curthread;
 	td->td_frame = frame;
 
 	esr = READ_SPECIALREG(esr_el1);
 	exception = ESR_ELx_EXCEPTION(esr);
 	switch (exception) {
 	case EXCP_UNKNOWN:
 	case EXCP_INSN_ABORT_L:
 	case EXCP_DATA_ABORT_L:
 	case EXCP_DATA_ABORT:
 		far = READ_SPECIALREG(far_el1);
 	}
 	intr_enable();
 
 	CTR4(KTR_TRAP,
 	    "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p",
 	    curthread, esr, frame->tf_elr, frame);
 
 	switch(exception) {
 	case EXCP_FP_SIMD:
 	case EXCP_TRAP_FP:
 #ifdef VFP
 		vfp_restore_state();
 #else
 		panic("VFP exception in userland");
 #endif
 		break;
 	case EXCP_SVC:
 		svc_handler(frame);
 		break;
 	case EXCP_INSN_ABORT_L:
 	case EXCP_DATA_ABORT_L:
 	case EXCP_DATA_ABORT:
 		data_abort(frame, esr, far, 1);
 		break;
 	case EXCP_UNKNOWN:
 		el0_excp_unknown(frame, far);
 		break;
 	case EXCP_SP_ALIGN:
 		call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp);
 		userret(td, frame);
 		break;
 	case EXCP_PC_ALIGN:
 		call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr);
 		userret(td, frame);
 		break;
 	case EXCP_BRK:
 		call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr);
 		userret(td, frame);
 		break;
 	case EXCP_MSR:
 		call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr); 
 		userret(td, frame);
 		break;
 	case EXCP_SOFTSTP_EL0:
 		td->td_frame->tf_spsr &= ~PSR_SS;
 		td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 		WRITE_SPECIALREG(MDSCR_EL1,
 		    READ_SPECIALREG(MDSCR_EL1) & ~DBG_MDSCR_SS);
 		call_trapsignal(td, SIGTRAP, TRAP_TRACE,
 		    (void *)frame->tf_elr);
 		userret(td, frame);
 		break;
 	default:
 		call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr);
 		userret(td, frame);
 		break;
 	}
 
 	KASSERT((curthread->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 	    ("Kernel VFP flags set while entering userspace"));
+	KASSERT(
+	    curthread->td_pcb->pcb_fpusaved == &curthread->td_pcb->pcb_fpustate,
+	    ("Kernel VFP state in use when entering userspace"));
 }
 
 void
 do_el0_error(struct trapframe *frame)
 {
 
 	panic("ARM64TODO: do_el0_error");
 }
 
Index: head/sys/arm64/arm64/vfp.c
===================================================================
--- head/sys/arm64/arm64/vfp.c	(revision 307666)
+++ head/sys/arm64/arm64/vfp.c	(revision 307667)
@@ -1,229 +1,345 @@
 /*-
- * Copyright (c) 2015 The FreeBSD Foundation
+ * Copyright (c) 2015-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifdef VFP
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 
 #include <machine/armreg.h>
 #include <machine/pcb.h>
 #include <machine/vfp.h>
 
 /* Sanity check we can store all the VFP registers */
 CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32);
 
 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
     "Kernel contexts for VFP state");
 
+struct fpu_kern_ctx {
+	struct vfpstate	*prev;
+#define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
+#define	FPU_KERN_CTX_INUSE	0x02
+	uint32_t	 flags;
+	struct vfpstate	 state;
+};
+
 static void
 vfp_enable(void)
 {
 	uint32_t cpacr;
 
 	cpacr = READ_SPECIALREG(cpacr_el1);
 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
 	WRITE_SPECIALREG(cpacr_el1, cpacr);
 	isb();
 }
 
 static void
 vfp_disable(void)
 {
 	uint32_t cpacr;
 
 	cpacr = READ_SPECIALREG(cpacr_el1);
 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1;
 	WRITE_SPECIALREG(cpacr_el1, cpacr);
 	isb();
 }
 
 /*
- * Called when the thread is dying. If the thread was the last to use the
- * VFP unit mark it as unused to tell the kernel the fp state is unowned.
- * Ensure the VFP unit is off so we get an exception on the next access.
+ * Called when the thread is dying or when discarding the kernel VFP state.
+ * If the thread was the last to use the VFP unit mark it as unused to tell
+ * the kernel the fp state is unowned. Ensure the VFP unit is off so we get
+ * an exception on the next access.
  */
 void
 vfp_discard(struct thread *td)
 {
 
 #ifdef INVARIANTS
 	if (td != NULL)
 		CRITICAL_ASSERT(td);
 #endif
 	if (PCPU_GET(fpcurthread) == td)
 		PCPU_SET(fpcurthread, NULL);
 
 	vfp_disable();
 }
 
 static void
 vfp_store(struct vfpstate *state)
 {
 	__int128_t *vfp_state;
 	uint64_t fpcr, fpsr;
 
 	vfp_state = state->vfp_regs;
 	__asm __volatile(
 	    "mrs	%0, fpcr		\n"
 	    "mrs	%1, fpsr		\n"
 	    "stp	q0,  q1,  [%2, #16 *  0]\n"
 	    "stp	q2,  q3,  [%2, #16 *  2]\n"
 	    "stp	q4,  q5,  [%2, #16 *  4]\n"
 	    "stp	q6,  q7,  [%2, #16 *  6]\n"
 	    "stp	q8,  q9,  [%2, #16 *  8]\n"
 	    "stp	q10, q11, [%2, #16 * 10]\n"
 	    "stp	q12, q13, [%2, #16 * 12]\n"
 	    "stp	q14, q15, [%2, #16 * 14]\n"
 	    "stp	q16, q17, [%2, #16 * 16]\n"
 	    "stp	q18, q19, [%2, #16 * 18]\n"
 	    "stp	q20, q21, [%2, #16 * 20]\n"
 	    "stp	q22, q23, [%2, #16 * 22]\n"
 	    "stp	q24, q25, [%2, #16 * 24]\n"
 	    "stp	q26, q27, [%2, #16 * 26]\n"
 	    "stp	q28, q29, [%2, #16 * 28]\n"
 	    "stp	q30, q31, [%2, #16 * 30]\n"
 	    : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
 
 	state->vfp_fpcr = fpcr;
 	state->vfp_fpsr = fpsr;
 }
 
 static void
 vfp_restore(struct vfpstate *state)
 {
 	__int128_t *vfp_state;
 	uint64_t fpcr, fpsr;
 
 	vfp_state = state->vfp_regs;
 	fpcr = state->vfp_fpcr;
 	fpsr = state->vfp_fpsr;
 
 	__asm __volatile(
 	    "ldp	q0,  q1,  [%2, #16 *  0]\n"
 	    "ldp	q2,  q3,  [%2, #16 *  2]\n"
 	    "ldp	q4,  q5,  [%2, #16 *  4]\n"
 	    "ldp	q6,  q7,  [%2, #16 *  6]\n"
 	    "ldp	q8,  q9,  [%2, #16 *  8]\n"
 	    "ldp	q10, q11, [%2, #16 * 10]\n"
 	    "ldp	q12, q13, [%2, #16 * 12]\n"
 	    "ldp	q14, q15, [%2, #16 * 14]\n"
 	    "ldp	q16, q17, [%2, #16 * 16]\n"
 	    "ldp	q18, q19, [%2, #16 * 18]\n"
 	    "ldp	q20, q21, [%2, #16 * 20]\n"
 	    "ldp	q22, q23, [%2, #16 * 22]\n"
 	    "ldp	q24, q25, [%2, #16 * 24]\n"
 	    "ldp	q26, q27, [%2, #16 * 26]\n"
 	    "ldp	q28, q29, [%2, #16 * 28]\n"
 	    "ldp	q30, q31, [%2, #16 * 30]\n"
 	    "msr	fpcr, %0		\n"
 	    "msr	fpsr, %1		\n"
 	    : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
 }
 
 void
 vfp_save_state(struct thread *td, struct pcb *pcb)
 {
 	uint32_t cpacr;
 
 	KASSERT(pcb != NULL, ("NULL vfp pcb"));
 	KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb"));
 
 	if (td == NULL)
 		td = curthread;
 
 	critical_enter();
 	/*
 	 * Only store the registers if the VFP is enabled,
 	 * i.e. return if we are trapping on FP access.
 	 */
 	cpacr = READ_SPECIALREG(cpacr_el1);
 	if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) {
 		KASSERT(PCPU_GET(fpcurthread) == td,
 		    ("Storing an invalid VFP state"));
 
 		vfp_store(pcb->pcb_fpusaved);
 		dsb(ish);
 		vfp_disable();
 	}
 	critical_exit();
 }
 
 void
 vfp_restore_state(void)
 {
 	struct pcb *curpcb;
 	u_int cpu;
 
 	critical_enter();
 
 	cpu = PCPU_GET(cpuid);
 	curpcb = curthread->td_pcb;
 	curpcb->pcb_fpflags |= PCB_FP_STARTED;
 
 	vfp_enable();
 
 	/*
 	 * If the previous thread on this cpu to use the VFP was not the
 	 * current threas, or the current thread last used it on a different
 	 * cpu we need to restore the old state.
 	 */
 	if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) {
 
 		vfp_restore(curthread->td_pcb->pcb_fpusaved);
 		PCPU_SET(fpcurthread, curthread);
 		curpcb->pcb_vfpcpu = cpu;
 	}
 
 	critical_exit();
 }
 
 void
 vfp_init(void)
 {
 	uint64_t pfr;
 
 	/* Check if there is a vfp unit present */
 	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
 	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
 		return;
 
 	/* Disable to be enabled when it's used */
 	vfp_disable();
 }
 
 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
 
+struct fpu_kern_ctx *
+fpu_kern_alloc_ctx(u_int flags)
+{
+	struct fpu_kern_ctx *res;
+	size_t sz;
+
+	sz = sizeof(struct fpu_kern_ctx);
+	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
+	    M_NOWAIT : M_WAITOK) | M_ZERO);
+	return (res);
+}
+
+void
+fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
+{
+
+	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
+	/* XXXAndrew clear the memory ? */
+	free(ctx, M_FPUKERN_CTX);
+}
+
+int
+fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
+	    ("using inuse ctx"));
+
+	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
+		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
+		return (0);
+	}
+	/*
+	 * Check either we are already using the VFP in the kernel, or
+	 * the the saved state points to the default user space.
+	 */
+	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
+	    pcb->pcb_fpusaved == &pcb->pcb_fpustate,
+	    ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate));
+	ctx->flags = FPU_KERN_CTX_INUSE;
+	vfp_save_state(curthread, pcb);
+	ctx->prev = pcb->pcb_fpusaved;
+	pcb->pcb_fpusaved = &ctx->state;
+	pcb->pcb_fpflags |= PCB_FP_KERN;
+	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
+
+	return (0);
+}
+
+int
+fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+
+	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
+	    ("FPU context not inuse"));
+	ctx->flags &= ~FPU_KERN_CTX_INUSE;
+
+	if (is_fpu_kern_thread(0) &&
+	    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
+		return (0);
+	KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
+	critical_enter();
+	vfp_discard(td);
+	critical_exit();
+	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
+	pcb->pcb_fpusaved = ctx->prev;
+
+	if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) {
+		pcb->pcb_fpflags &= ~PCB_FP_KERN;
+	} else {
+		KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
+		    ("unpaired fpu_kern_leave"));
+	}
+
+	return (0);
+}
+
+int
+fpu_kern_thread(u_int flags)
+{
+	struct pcb *pcb = curthread->td_pcb;
+
+	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
+	    ("Only kthread may use fpu_kern_thread"));
+	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
+	    ("Mangled pcb_fpusaved"));
+	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
+	    ("Thread already setup for the VFP"));
+	pcb->pcb_fpflags |= PCB_FP_KERN;
+	return (0);
+}
+
+int
+is_fpu_kern_thread(u_int flags)
+{
+	struct pcb *curpcb;
+
+	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
+		return (0);
+	curpcb = curthread->td_pcb;
+	return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
+}
 #endif
Index: head/sys/arm64/include/pcb.h
===================================================================
--- head/sys/arm64/include/pcb.h	(revision 307666)
+++ head/sys/arm64/include/pcb.h	(revision 307667)
@@ -1,76 +1,77 @@
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PCB_H_
 #define	_MACHINE_PCB_H_
 
 #ifndef LOCORE
 
 #include <machine/vfp.h>
 
 struct trapframe;
 
 #define	PCB_LR		30
 struct pcb {
 	uint64_t	pcb_x[31];
 	uint64_t	pcb_pc;
 	/* These two need to be in order as we access them together */
 	uint64_t	pcb_sp;
 	uint64_t	pcb_tpidr_el0;
 	vm_offset_t	pcb_l0addr;
 
 	/* Fault handler, the error value is passed in x0 */
 	vm_offset_t	pcb_onfault;
 
 	u_int		pcb_flags;
 #define	PCB_SINGLE_STEP_SHIFT	0
 #define	PCB_SINGLE_STEP		(1 << PCB_SINGLE_STEP_SHIFT)
 
 	struct vfpstate	*pcb_fpusaved;
 	int		pcb_fpflags;
 #define	PCB_FP_STARTED	0x01
+#define	PCB_FP_KERN	0x02
 /* The bits passed to userspace in get_fpcontext */
 #define	PCB_FP_USERMASK	(PCB_FP_STARTED)
 	u_int		pcb_vfpcpu;	/* Last cpu this thread ran VFP code */
 
 	/*
 	 * The userspace VFP state. The pcb_fpusaved pointer will point to
 	 * this unless the kernel has allocated a VFP context.
 	 * Place last to simplify the asm to access the rest if the struct.
 	 */
 	struct vfpstate	pcb_fpustate;
 };
 
 #ifdef _KERNEL
 void	makectx(struct trapframe *tf, struct pcb *pcb);
 int	savectx(struct pcb *pcb) __returns_twice;
 #endif
 
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PCB_H_ */
Index: head/sys/arm64/include/vfp.h
===================================================================
--- head/sys/arm64/include/vfp.h	(revision 307666)
+++ head/sys/arm64/include/vfp.h	(revision 307667)
@@ -1,52 +1,69 @@
 /*-
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_VFP_H_
 #define	_MACHINE_VFP_H_
 
 
 #ifndef LOCORE
 struct vfpstate {
 	__uint128_t	vfp_regs[32];
 	uint32_t	vfp_fpcr;
 	uint32_t	vfp_fpsr;
 };
 
 #ifdef _KERNEL
 void	vfp_init(void);
 void	vfp_discard(struct thread *);
 void	vfp_restore_state(void);
 void	vfp_save_state(struct thread *, struct pcb *);
+
+struct fpu_kern_ctx;
+
+/*
+ * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread().
+ */
+#define	FPU_KERN_NORMAL	0x0000
+#define	FPU_KERN_NOWAIT	0x0001
+#define	FPU_KERN_KTHR	0x0002
+
+struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int);
+void fpu_kern_free_ctx(struct fpu_kern_ctx *);
+int fpu_kern_enter(struct thread *, struct fpu_kern_ctx *, u_int);
+int fpu_kern_leave(struct thread *, struct fpu_kern_ctx *);
+int fpu_kern_thread(u_int);
+int is_fpu_kern_thread(u_int);
+
 #endif
 
 #endif
 
 #endif /* !_MACHINE_VFP_H_ */
Index: head/sys/opencrypto/crypto.c
===================================================================
--- head/sys/opencrypto/crypto.c	(revision 307666)
+++ head/sys/opencrypto/crypto.c	(revision 307667)
@@ -1,1577 +1,1577 @@
 /*-
  * Copyright (c) 2002-2006 Sam Leffler.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Cryptographic Subsystem.
  *
  * This code is derived from the Openbsd Cryptographic Framework (OCF)
  * that has the copyright shown below.  Very little of the original
  * code remains.
  */
 
 /*-
  * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
  *
  * This code was written by Angelos D. Keromytis in Athens, Greece, in
  * February 2000. Network Security Technologies Inc. (NSTI) kindly
  * supported the development of this code.
  *
  * Copyright (c) 2000, 2001 Angelos D. Keromytis
  *
  * Permission to use, copy, and modify this software with or without fee
  * is hereby granted, provided that this entire notice is included in
  * all source code copies of any software which is or includes a copy or
  * modification of this software.
  *
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
  * PURPOSE.
  */
 
 #define	CRYPTO_TIMING				/* enable timing support */
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/uma.h>
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>			/* XXX for M_XDATA */
 
 #include <sys/kobj.h>
 #include <sys/bus.h>
 #include "cryptodev_if.h"
 
-#if defined(__i386__) || defined(__amd64__)
+#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__)
 #include <machine/pcb.h>
 #endif
 
 SDT_PROVIDER_DEFINE(opencrypto);
 
 /*
  * Crypto drivers register themselves by allocating a slot in the
  * crypto_drivers table with crypto_get_driverid() and then registering
  * each algorithm they support with crypto_register() and crypto_kregister().
  */
 static	struct mtx crypto_drivers_mtx;		/* lock on driver table */
 #define	CRYPTO_DRIVER_LOCK()	mtx_lock(&crypto_drivers_mtx)
 #define	CRYPTO_DRIVER_UNLOCK()	mtx_unlock(&crypto_drivers_mtx)
 #define	CRYPTO_DRIVER_ASSERT()	mtx_assert(&crypto_drivers_mtx, MA_OWNED)
 
 /*
  * Crypto device/driver capabilities structure.
  *
  * Synchronization:
  * (d) - protected by CRYPTO_DRIVER_LOCK()
  * (q) - protected by CRYPTO_Q_LOCK()
  * Not tagged fields are read-only.
  */
 struct cryptocap {
 	device_t	cc_dev;			/* (d) device/driver */
 	u_int32_t	cc_sessions;		/* (d) # of sessions */
 	u_int32_t	cc_koperations;		/* (d) # os asym operations */
 	/*
 	 * Largest possible operator length (in bits) for each type of
 	 * encryption algorithm. XXX not used
 	 */
 	u_int16_t	cc_max_op_len[CRYPTO_ALGORITHM_MAX + 1];
 	u_int8_t	cc_alg[CRYPTO_ALGORITHM_MAX + 1];
 	u_int8_t	cc_kalg[CRK_ALGORITHM_MAX + 1];
 
 	int		cc_flags;		/* (d) flags */
 #define CRYPTOCAP_F_CLEANUP	0x80000000	/* needs resource cleanup */
 	int		cc_qblocked;		/* (q) symmetric q blocked */
 	int		cc_kqblocked;		/* (q) asymmetric q blocked */
 };
 static	struct cryptocap *crypto_drivers = NULL;
 static	int crypto_drivers_num = 0;
 
 /*
  * There are two queues for crypto requests; one for symmetric (e.g.
  * cipher) operations and one for asymmetric (e.g. MOD)operations.
  * A single mutex is used to lock access to both queues.  We could
  * have one per-queue but having one simplifies handling of block/unblock
  * operations.
  */
 static	int crp_sleep = 0;
 static	TAILQ_HEAD(,cryptop) crp_q;		/* request queues */
 static	TAILQ_HEAD(,cryptkop) crp_kq;
 static	struct mtx crypto_q_mtx;
 #define	CRYPTO_Q_LOCK()		mtx_lock(&crypto_q_mtx)
 #define	CRYPTO_Q_UNLOCK()	mtx_unlock(&crypto_q_mtx)
 
 /*
  * There are two queues for processing completed crypto requests; one
  * for the symmetric and one for the asymmetric ops.  We only need one
  * but have two to avoid type futzing (cryptop vs. cryptkop).  A single
  * mutex is used to lock access to both queues.  Note that this lock
  * must be separate from the lock on request queues to insure driver
  * callbacks don't generate lock order reversals.
  */
 static	TAILQ_HEAD(,cryptop) crp_ret_q;		/* callback queues */
 static	TAILQ_HEAD(,cryptkop) crp_ret_kq;
 static	struct mtx crypto_ret_q_mtx;
 #define	CRYPTO_RETQ_LOCK()	mtx_lock(&crypto_ret_q_mtx)
 #define	CRYPTO_RETQ_UNLOCK()	mtx_unlock(&crypto_ret_q_mtx)
 #define	CRYPTO_RETQ_EMPTY()	(TAILQ_EMPTY(&crp_ret_q) && TAILQ_EMPTY(&crp_ret_kq))
 
 static	uma_zone_t cryptop_zone;
 static	uma_zone_t cryptodesc_zone;
 
 int	crypto_userasymcrypto = 1;	/* userland may do asym crypto reqs */
 SYSCTL_INT(_kern, OID_AUTO, userasymcrypto, CTLFLAG_RW,
 	   &crypto_userasymcrypto, 0,
 	   "Enable/disable user-mode access to asymmetric crypto support");
 int	crypto_devallowsoft = 0;	/* only use hardware crypto */
 SYSCTL_INT(_kern, OID_AUTO, cryptodevallowsoft, CTLFLAG_RW,
 	   &crypto_devallowsoft, 0,
 	   "Enable/disable use of software crypto by /dev/crypto");
 
 MALLOC_DEFINE(M_CRYPTO_DATA, "crypto", "crypto session records");
 
 static	void crypto_proc(void);
 static	struct proc *cryptoproc;
 static	void crypto_ret_proc(void);
 static	struct proc *cryptoretproc;
 static	void crypto_destroy(void);
 static	int crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint);
 static	int crypto_kinvoke(struct cryptkop *krp, int flags);
 
 static	struct cryptostats cryptostats;
 SYSCTL_STRUCT(_kern, OID_AUTO, crypto_stats, CTLFLAG_RW, &cryptostats,
 	    cryptostats, "Crypto system statistics");
 
 #ifdef CRYPTO_TIMING
 static	int crypto_timing = 0;
 SYSCTL_INT(_debug, OID_AUTO, crypto_timing, CTLFLAG_RW,
 	   &crypto_timing, 0, "Enable/disable crypto timing support");
 #endif
 
 static int
 crypto_init(void)
 {
 	int error;
 
 	mtx_init(&crypto_drivers_mtx, "crypto", "crypto driver table",
 		MTX_DEF|MTX_QUIET);
 
 	TAILQ_INIT(&crp_q);
 	TAILQ_INIT(&crp_kq);
 	mtx_init(&crypto_q_mtx, "crypto", "crypto op queues", MTX_DEF);
 
 	TAILQ_INIT(&crp_ret_q);
 	TAILQ_INIT(&crp_ret_kq);
 	mtx_init(&crypto_ret_q_mtx, "crypto", "crypto return queues", MTX_DEF);
 
 	cryptop_zone = uma_zcreate("cryptop", sizeof (struct cryptop),
 				    0, 0, 0, 0,
 				    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cryptodesc_zone = uma_zcreate("cryptodesc", sizeof (struct cryptodesc),
 				    0, 0, 0, 0,
 				    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	if (cryptodesc_zone == NULL || cryptop_zone == NULL) {
 		printf("crypto_init: cannot setup crypto zones\n");
 		error = ENOMEM;
 		goto bad;
 	}
 
 	crypto_drivers_num = CRYPTO_DRIVERS_INITIAL;
 	crypto_drivers = malloc(crypto_drivers_num *
 	    sizeof(struct cryptocap), M_CRYPTO_DATA, M_NOWAIT | M_ZERO);
 	if (crypto_drivers == NULL) {
 		printf("crypto_init: cannot setup crypto drivers\n");
 		error = ENOMEM;
 		goto bad;
 	}
 
 	error = kproc_create((void (*)(void *)) crypto_proc, NULL,
 		    &cryptoproc, 0, 0, "crypto");
 	if (error) {
 		printf("crypto_init: cannot start crypto thread; error %d",
 			error);
 		goto bad;
 	}
 
 	error = kproc_create((void (*)(void *)) crypto_ret_proc, NULL,
 		    &cryptoretproc, 0, 0, "crypto returns");
 	if (error) {
 		printf("crypto_init: cannot start cryptoret thread; error %d",
 			error);
 		goto bad;
 	}
 	return 0;
 bad:
 	crypto_destroy();
 	return error;
 }
 
 /*
  * Signal a crypto thread to terminate.  We use the driver
  * table lock to synchronize the sleep/wakeups so that we
  * are sure the threads have terminated before we release
  * the data structures they use.  See crypto_finis below
  * for the other half of this song-and-dance.
  */
 static void
 crypto_terminate(struct proc **pp, void *q)
 {
 	struct proc *p;
 
 	mtx_assert(&crypto_drivers_mtx, MA_OWNED);
 	p = *pp;
 	*pp = NULL;
 	if (p) {
 		wakeup_one(q);
 		PROC_LOCK(p);		/* NB: insure we don't miss wakeup */
 		CRYPTO_DRIVER_UNLOCK();	/* let crypto_finis progress */
 		msleep(p, &p->p_mtx, PWAIT, "crypto_destroy", 0);
 		PROC_UNLOCK(p);
 		CRYPTO_DRIVER_LOCK();
 	}
 }
 
 static void
 crypto_destroy(void)
 {
 	/*
 	 * Terminate any crypto threads.
 	 */
 	CRYPTO_DRIVER_LOCK();
 	crypto_terminate(&cryptoproc, &crp_q);
 	crypto_terminate(&cryptoretproc, &crp_ret_q);
 	CRYPTO_DRIVER_UNLOCK();
 
 	/* XXX flush queues??? */
 
 	/* 
 	 * Reclaim dynamically allocated resources.
 	 */
 	if (crypto_drivers != NULL)
 		free(crypto_drivers, M_CRYPTO_DATA);
 
 	if (cryptodesc_zone != NULL)
 		uma_zdestroy(cryptodesc_zone);
 	if (cryptop_zone != NULL)
 		uma_zdestroy(cryptop_zone);
 	mtx_destroy(&crypto_q_mtx);
 	mtx_destroy(&crypto_ret_q_mtx);
 	mtx_destroy(&crypto_drivers_mtx);
 }
 
 static struct cryptocap *
 crypto_checkdriver(u_int32_t hid)
 {
 	if (crypto_drivers == NULL)
 		return NULL;
 	return (hid >= crypto_drivers_num ? NULL : &crypto_drivers[hid]);
 }
 
 /*
  * Compare a driver's list of supported algorithms against another
  * list; return non-zero if all algorithms are supported.
  */
 static int
 driver_suitable(const struct cryptocap *cap, const struct cryptoini *cri)
 {
 	const struct cryptoini *cr;
 
 	/* See if all the algorithms are supported. */
 	for (cr = cri; cr; cr = cr->cri_next)
 		if (cap->cc_alg[cr->cri_alg] == 0)
 			return 0;
 	return 1;
 }
 
 /*
  * Select a driver for a new session that supports the specified
  * algorithms and, optionally, is constrained according to the flags.
  * The algorithm we use here is pretty stupid; just use the
  * first driver that supports all the algorithms we need. If there
  * are multiple drivers we choose the driver with the fewest active
  * sessions.  We prefer hardware-backed drivers to software ones.
  *
  * XXX We need more smarts here (in real life too, but that's
  * XXX another story altogether).
  */
 static struct cryptocap *
 crypto_select_driver(const struct cryptoini *cri, int flags)
 {
 	struct cryptocap *cap, *best;
 	int match, hid;
 
 	CRYPTO_DRIVER_ASSERT();
 
 	/*
 	 * Look first for hardware crypto devices if permitted.
 	 */
 	if (flags & CRYPTOCAP_F_HARDWARE)
 		match = CRYPTOCAP_F_HARDWARE;
 	else
 		match = CRYPTOCAP_F_SOFTWARE;
 	best = NULL;
 again:
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		cap = &crypto_drivers[hid];
 		/*
 		 * If it's not initialized, is in the process of
 		 * going away, or is not appropriate (hardware
 		 * or software based on match), then skip.
 		 */
 		if (cap->cc_dev == NULL ||
 		    (cap->cc_flags & CRYPTOCAP_F_CLEANUP) ||
 		    (cap->cc_flags & match) == 0)
 			continue;
 
 		/* verify all the algorithms are supported. */
 		if (driver_suitable(cap, cri)) {
 			if (best == NULL ||
 			    cap->cc_sessions < best->cc_sessions)
 				best = cap;
 		}
 	}
 	if (best == NULL && match == CRYPTOCAP_F_HARDWARE &&
 	    (flags & CRYPTOCAP_F_SOFTWARE)) {
 		/* sort of an Algol 68-style for loop */
 		match = CRYPTOCAP_F_SOFTWARE;
 		goto again;
 	}
 	return best;
 }
 
 /*
  * Create a new session.  The crid argument specifies a crypto
  * driver to use or constraints on a driver to select (hardware
  * only, software only, either).  Whatever driver is selected
  * must be capable of the requested crypto algorithms.
  */
 int
 crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid)
 {
 	struct cryptocap *cap;
 	u_int32_t hid, lid;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 	if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
 		/*
 		 * Use specified driver; verify it is capable.
 		 */
 		cap = crypto_checkdriver(crid);
 		if (cap != NULL && !driver_suitable(cap, cri))
 			cap = NULL;
 	} else {
 		/*
 		 * No requested driver; select based on crid flags.
 		 */
 		cap = crypto_select_driver(cri, crid);
 		/*
 		 * if NULL then can't do everything in one session.
 		 * XXX Fix this. We need to inject a "virtual" session
 		 * XXX layer right about here.
 		 */
 	}
 	if (cap != NULL) {
 		/* Call the driver initialization routine. */
 		hid = cap - crypto_drivers;
 		lid = hid;		/* Pass the driver ID. */
 		err = CRYPTODEV_NEWSESSION(cap->cc_dev, &lid, cri);
 		if (err == 0) {
 			(*sid) = (cap->cc_flags & 0xff000000)
 			       | (hid & 0x00ffffff);
 			(*sid) <<= 32;
 			(*sid) |= (lid & 0xffffffff);
 			cap->cc_sessions++;
 		} else
 			CRYPTDEB("dev newsession failed");
 	} else {
 		CRYPTDEB("no driver");
 		err = EINVAL;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 static void
 crypto_remove(struct cryptocap *cap)
 {
 
 	mtx_assert(&crypto_drivers_mtx, MA_OWNED);
 	if (cap->cc_sessions == 0 && cap->cc_koperations == 0)
 		bzero(cap, sizeof(*cap));
 }
 
 /*
  * Delete an existing session (or a reserved session on an unregistered
  * driver).
  */
 int
 crypto_freesession(u_int64_t sid)
 {
 	struct cryptocap *cap;
 	u_int32_t hid;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 
 	if (crypto_drivers == NULL) {
 		err = EINVAL;
 		goto done;
 	}
 
 	/* Determine two IDs. */
 	hid = CRYPTO_SESID2HID(sid);
 
 	if (hid >= crypto_drivers_num) {
 		err = ENOENT;
 		goto done;
 	}
 	cap = &crypto_drivers[hid];
 
 	if (cap->cc_sessions)
 		cap->cc_sessions--;
 
 	/* Call the driver cleanup routine, if available. */
 	err = CRYPTODEV_FREESESSION(cap->cc_dev, sid);
 
 	if (cap->cc_flags & CRYPTOCAP_F_CLEANUP)
 		crypto_remove(cap);
 
 done:
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 /*
  * Return an unused driver id.  Used by drivers prior to registering
  * support for the algorithms they handle.
  */
 int32_t
 crypto_get_driverid(device_t dev, int flags)
 {
 	struct cryptocap *newdrv;
 	int i;
 
 	if ((flags & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
 		printf("%s: no flags specified when registering driver\n",
 		    device_get_nameunit(dev));
 		return -1;
 	}
 
 	CRYPTO_DRIVER_LOCK();
 
 	for (i = 0; i < crypto_drivers_num; i++) {
 		if (crypto_drivers[i].cc_dev == NULL &&
 		    (crypto_drivers[i].cc_flags & CRYPTOCAP_F_CLEANUP) == 0) {
 			break;
 		}
 	}
 
 	/* Out of entries, allocate some more. */
 	if (i == crypto_drivers_num) {
 		/* Be careful about wrap-around. */
 		if (2 * crypto_drivers_num <= crypto_drivers_num) {
 			CRYPTO_DRIVER_UNLOCK();
 			printf("crypto: driver count wraparound!\n");
 			return -1;
 		}
 
 		newdrv = malloc(2 * crypto_drivers_num *
 		    sizeof(struct cryptocap), M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 		if (newdrv == NULL) {
 			CRYPTO_DRIVER_UNLOCK();
 			printf("crypto: no space to expand driver table!\n");
 			return -1;
 		}
 
 		bcopy(crypto_drivers, newdrv,
 		    crypto_drivers_num * sizeof(struct cryptocap));
 
 		crypto_drivers_num *= 2;
 
 		free(crypto_drivers, M_CRYPTO_DATA);
 		crypto_drivers = newdrv;
 	}
 
 	/* NB: state is zero'd on free */
 	crypto_drivers[i].cc_sessions = 1;	/* Mark */
 	crypto_drivers[i].cc_dev = dev;
 	crypto_drivers[i].cc_flags = flags;
 	if (bootverbose)
 		printf("crypto: assign %s driver id %u, flags %u\n",
 		    device_get_nameunit(dev), i, flags);
 
 	CRYPTO_DRIVER_UNLOCK();
 
 	return i;
 }
 
 /*
  * Lookup a driver by name.  We match against the full device
  * name and unit, and against just the name.  The latter gives
  * us a simple widlcarding by device name.  On success return the
  * driver/hardware identifier; otherwise return -1.
  */
 int
 crypto_find_driver(const char *match)
 {
 	int i, len = strlen(match);
 
 	CRYPTO_DRIVER_LOCK();
 	for (i = 0; i < crypto_drivers_num; i++) {
 		device_t dev = crypto_drivers[i].cc_dev;
 		if (dev == NULL ||
 		    (crypto_drivers[i].cc_flags & CRYPTOCAP_F_CLEANUP))
 			continue;
 		if (strncmp(match, device_get_nameunit(dev), len) == 0 ||
 		    strncmp(match, device_get_name(dev), len) == 0)
 			break;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	return i < crypto_drivers_num ? i : -1;
 }
 
 /*
  * Return the device_t for the specified driver or NULL
  * if the driver identifier is invalid.
  */
 device_t
 crypto_find_device_byhid(int hid)
 {
 	struct cryptocap *cap = crypto_checkdriver(hid);
 	return cap != NULL ? cap->cc_dev : NULL;
 }
 
 /*
  * Return the device/driver capabilities.
  */
 int
 crypto_getcaps(int hid)
 {
 	struct cryptocap *cap = crypto_checkdriver(hid);
 	return cap != NULL ? cap->cc_flags : 0;
 }
 
 /*
  * Register support for a key-related algorithm.  This routine
  * is called once for each algorithm supported a driver.
  */
 int
 crypto_kregister(u_int32_t driverid, int kalg, u_int32_t flags)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL &&
 	    (CRK_ALGORITM_MIN <= kalg && kalg <= CRK_ALGORITHM_MAX)) {
 		/*
 		 * XXX Do some performance testing to determine placing.
 		 * XXX We probably need an auxiliary data structure that
 		 * XXX describes relative performances.
 		 */
 
 		cap->cc_kalg[kalg] = flags | CRYPTO_ALG_FLAG_SUPPORTED;
 		if (bootverbose)
 			printf("crypto: %s registers key alg %u flags %u\n"
 				, device_get_nameunit(cap->cc_dev)
 				, kalg
 				, flags
 			);
 		err = 0;
 	} else
 		err = EINVAL;
 
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 /*
  * Register support for a non-key-related algorithm.  This routine
  * is called once for each such algorithm supported by a driver.
  */
 int
 crypto_register(u_int32_t driverid, int alg, u_int16_t maxoplen,
     u_int32_t flags)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 
 	cap = crypto_checkdriver(driverid);
 	/* NB: algorithms are in the range [1..max] */
 	if (cap != NULL &&
 	    (CRYPTO_ALGORITHM_MIN <= alg && alg <= CRYPTO_ALGORITHM_MAX)) {
 		/*
 		 * XXX Do some performance testing to determine placing.
 		 * XXX We probably need an auxiliary data structure that
 		 * XXX describes relative performances.
 		 */
 
 		cap->cc_alg[alg] = flags | CRYPTO_ALG_FLAG_SUPPORTED;
 		cap->cc_max_op_len[alg] = maxoplen;
 		if (bootverbose)
 			printf("crypto: %s registers alg %u flags %u maxoplen %u\n"
 				, device_get_nameunit(cap->cc_dev)
 				, alg
 				, flags
 				, maxoplen
 			);
 		cap->cc_sessions = 0;		/* Unmark */
 		err = 0;
 	} else
 		err = EINVAL;
 
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 static void
 driver_finis(struct cryptocap *cap)
 {
 	u_int32_t ses, kops;
 
 	CRYPTO_DRIVER_ASSERT();
 
 	ses = cap->cc_sessions;
 	kops = cap->cc_koperations;
 	bzero(cap, sizeof(*cap));
 	if (ses != 0 || kops != 0) {
 		/*
 		 * If there are pending sessions,
 		 * just mark as invalid.
 		 */
 		cap->cc_flags |= CRYPTOCAP_F_CLEANUP;
 		cap->cc_sessions = ses;
 		cap->cc_koperations = kops;
 	}
 }
 
 /*
  * Unregister a crypto driver. If there are pending sessions using it,
  * leave enough information around so that subsequent calls using those
  * sessions will correctly detect the driver has been unregistered and
  * reroute requests.
  */
 int
 crypto_unregister(u_int32_t driverid, int alg)
 {
 	struct cryptocap *cap;
 	int i, err;
 
 	CRYPTO_DRIVER_LOCK();
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL &&
 	    (CRYPTO_ALGORITHM_MIN <= alg && alg <= CRYPTO_ALGORITHM_MAX) &&
 	    cap->cc_alg[alg] != 0) {
 		cap->cc_alg[alg] = 0;
 		cap->cc_max_op_len[alg] = 0;
 
 		/* Was this the last algorithm ? */
 		for (i = 1; i <= CRYPTO_ALGORITHM_MAX; i++)
 			if (cap->cc_alg[i] != 0)
 				break;
 
 		if (i == CRYPTO_ALGORITHM_MAX + 1)
 			driver_finis(cap);
 		err = 0;
 	} else
 		err = EINVAL;
 	CRYPTO_DRIVER_UNLOCK();
 
 	return err;
 }
 
 /*
  * Unregister all algorithms associated with a crypto driver.
  * If there are pending sessions using it, leave enough information
  * around so that subsequent calls using those sessions will
  * correctly detect the driver has been unregistered and reroute
  * requests.
  */
 int
 crypto_unregister_all(u_int32_t driverid)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL) {
 		driver_finis(cap);
 		err = 0;
 	} else
 		err = EINVAL;
 	CRYPTO_DRIVER_UNLOCK();
 
 	return err;
 }
 
 /*
  * Clear blockage on a driver.  The what parameter indicates whether
  * the driver is now ready for cryptop's and/or cryptokop's.
  */
 int
 crypto_unblock(u_int32_t driverid, int what)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_Q_LOCK();
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL) {
 		if (what & CRYPTO_SYMQ)
 			cap->cc_qblocked = 0;
 		if (what & CRYPTO_ASYMQ)
 			cap->cc_kqblocked = 0;
 		if (crp_sleep)
 			wakeup_one(&crp_q);
 		err = 0;
 	} else
 		err = EINVAL;
 	CRYPTO_Q_UNLOCK();
 
 	return err;
 }
 
 /*
  * Add a crypto request to a queue, to be processed by the kernel thread.
  */
 int
 crypto_dispatch(struct cryptop *crp)
 {
 	struct cryptocap *cap;
 	u_int32_t hid;
 	int result;
 
 	cryptostats.cs_ops++;
 
 #ifdef CRYPTO_TIMING
 	if (crypto_timing)
 		binuptime(&crp->crp_tstamp);
 #endif
 
 	hid = CRYPTO_SESID2HID(crp->crp_sid);
 
 	if ((crp->crp_flags & CRYPTO_F_BATCH) == 0) {
 		/*
 		 * Caller marked the request to be processed
 		 * immediately; dispatch it directly to the
 		 * driver unless the driver is currently blocked.
 		 */
 		cap = crypto_checkdriver(hid);
 		/* Driver cannot disappeared when there is an active session. */
 		KASSERT(cap != NULL, ("%s: Driver disappeared.", __func__));
 		if (!cap->cc_qblocked) {
 			result = crypto_invoke(cap, crp, 0);
 			if (result != ERESTART)
 				return (result);
 			/*
 			 * The driver ran out of resources, put the request on
 			 * the queue.
 			 */
 		}
 	}
 	CRYPTO_Q_LOCK();
 	TAILQ_INSERT_TAIL(&crp_q, crp, crp_next);
 	if (crp_sleep)
 		wakeup_one(&crp_q);
 	CRYPTO_Q_UNLOCK();
 	return 0;
 }
 
 /*
  * Add an asymetric crypto request to a queue,
  * to be processed by the kernel thread.
  */
 int
 crypto_kdispatch(struct cryptkop *krp)
 {
 	int error;
 
 	cryptostats.cs_kops++;
 
 	error = crypto_kinvoke(krp, krp->krp_crid);
 	if (error == ERESTART) {
 		CRYPTO_Q_LOCK();
 		TAILQ_INSERT_TAIL(&crp_kq, krp, krp_next);
 		if (crp_sleep)
 			wakeup_one(&crp_q);
 		CRYPTO_Q_UNLOCK();
 		error = 0;
 	}
 	return error;
 }
 
 /*
  * Verify a driver is suitable for the specified operation.
  */
 static __inline int
 kdriver_suitable(const struct cryptocap *cap, const struct cryptkop *krp)
 {
 	return (cap->cc_kalg[krp->krp_op] & CRYPTO_ALG_FLAG_SUPPORTED) != 0;
 }
 
 /*
  * Select a driver for an asym operation.  The driver must
  * support the necessary algorithm.  The caller can constrain
  * which device is selected with the flags parameter.  The
  * algorithm we use here is pretty stupid; just use the first
  * driver that supports the algorithms we need. If there are
  * multiple suitable drivers we choose the driver with the
  * fewest active operations.  We prefer hardware-backed
  * drivers to software ones when either may be used.
  */
 static struct cryptocap *
 crypto_select_kdriver(const struct cryptkop *krp, int flags)
 {
 	struct cryptocap *cap, *best, *blocked;
 	int match, hid;
 
 	CRYPTO_DRIVER_ASSERT();
 
 	/*
 	 * Look first for hardware crypto devices if permitted.
 	 */
 	if (flags & CRYPTOCAP_F_HARDWARE)
 		match = CRYPTOCAP_F_HARDWARE;
 	else
 		match = CRYPTOCAP_F_SOFTWARE;
 	best = NULL;
 	blocked = NULL;
 again:
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		cap = &crypto_drivers[hid];
 		/*
 		 * If it's not initialized, is in the process of
 		 * going away, or is not appropriate (hardware
 		 * or software based on match), then skip.
 		 */
 		if (cap->cc_dev == NULL ||
 		    (cap->cc_flags & CRYPTOCAP_F_CLEANUP) ||
 		    (cap->cc_flags & match) == 0)
 			continue;
 
 		/* verify all the algorithms are supported. */
 		if (kdriver_suitable(cap, krp)) {
 			if (best == NULL ||
 			    cap->cc_koperations < best->cc_koperations)
 				best = cap;
 		}
 	}
 	if (best != NULL)
 		return best;
 	if (match == CRYPTOCAP_F_HARDWARE && (flags & CRYPTOCAP_F_SOFTWARE)) {
 		/* sort of an Algol 68-style for loop */
 		match = CRYPTOCAP_F_SOFTWARE;
 		goto again;
 	}
 	return best;
 }
 
 /*
  * Dispatch an asymmetric crypto request.
  */
 static int
 crypto_kinvoke(struct cryptkop *krp, int crid)
 {
 	struct cryptocap *cap = NULL;
 	int error;
 
 	KASSERT(krp != NULL, ("%s: krp == NULL", __func__));
 	KASSERT(krp->krp_callback != NULL,
 	    ("%s: krp->crp_callback == NULL", __func__));
 
 	CRYPTO_DRIVER_LOCK();
 	if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
 		cap = crypto_checkdriver(crid);
 		if (cap != NULL) {
 			/*
 			 * Driver present, it must support the necessary
 			 * algorithm and, if s/w drivers are excluded,
 			 * it must be registered as hardware-backed.
 			 */
 			if (!kdriver_suitable(cap, krp) ||
 			    (!crypto_devallowsoft &&
 			     (cap->cc_flags & CRYPTOCAP_F_HARDWARE) == 0))
 				cap = NULL;
 		}
 	} else {
 		/*
 		 * No requested driver; select based on crid flags.
 		 */
 		if (!crypto_devallowsoft)	/* NB: disallow s/w drivers */
 			crid &= ~CRYPTOCAP_F_SOFTWARE;
 		cap = crypto_select_kdriver(krp, crid);
 	}
 	if (cap != NULL && !cap->cc_kqblocked) {
 		krp->krp_hid = cap - crypto_drivers;
 		cap->cc_koperations++;
 		CRYPTO_DRIVER_UNLOCK();
 		error = CRYPTODEV_KPROCESS(cap->cc_dev, krp, 0);
 		CRYPTO_DRIVER_LOCK();
 		if (error == ERESTART) {
 			cap->cc_koperations--;
 			CRYPTO_DRIVER_UNLOCK();
 			return (error);
 		}
 	} else {
 		/*
 		 * NB: cap is !NULL if device is blocked; in
 		 *     that case return ERESTART so the operation
 		 *     is resubmitted if possible.
 		 */
 		error = (cap == NULL) ? ENODEV : ERESTART;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 
 	if (error) {
 		krp->krp_status = error;
 		crypto_kdone(krp);
 	}
 	return 0;
 }
 
 #ifdef CRYPTO_TIMING
 static void
 crypto_tstat(struct cryptotstat *ts, struct bintime *bt)
 {
 	struct bintime now, delta;
 	struct timespec t;
 	uint64_t u;
 
 	binuptime(&now);
 	u = now.frac;
 	delta.frac = now.frac - bt->frac;
 	delta.sec = now.sec - bt->sec;
 	if (u < delta.frac)
 		delta.sec--;
 	bintime2timespec(&delta, &t);
 	timespecadd(&ts->acc, &t);
 	if (timespeccmp(&t, &ts->min, <))
 		ts->min = t;
 	if (timespeccmp(&t, &ts->max, >))
 		ts->max = t;
 	ts->count++;
 
 	*bt = now;
 }
 #endif
 
 /*
  * Dispatch a crypto request to the appropriate crypto devices.
  */
 static int
 crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint)
 {
 
 	KASSERT(crp != NULL, ("%s: crp == NULL", __func__));
 	KASSERT(crp->crp_callback != NULL,
 	    ("%s: crp->crp_callback == NULL", __func__));
 	KASSERT(crp->crp_desc != NULL, ("%s: crp->crp_desc == NULL", __func__));
 
 #ifdef CRYPTO_TIMING
 	if (crypto_timing)
 		crypto_tstat(&cryptostats.cs_invoke, &crp->crp_tstamp);
 #endif
 	if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) {
 		struct cryptodesc *crd;
 		u_int64_t nid;
 
 		/*
 		 * Driver has unregistered; migrate the session and return
 		 * an error to the caller so they'll resubmit the op.
 		 *
 		 * XXX: What if there are more already queued requests for this
 		 *      session?
 		 */
 		crypto_freesession(crp->crp_sid);
 
 		for (crd = crp->crp_desc; crd->crd_next; crd = crd->crd_next)
 			crd->CRD_INI.cri_next = &(crd->crd_next->CRD_INI);
 
 		/* XXX propagate flags from initial session? */
 		if (crypto_newsession(&nid, &(crp->crp_desc->CRD_INI),
 		    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE) == 0)
 			crp->crp_sid = nid;
 
 		crp->crp_etype = EAGAIN;
 		crypto_done(crp);
 		return 0;
 	} else {
 		/*
 		 * Invoke the driver to process the request.
 		 */
 		return CRYPTODEV_PROCESS(cap->cc_dev, crp, hint);
 	}
 }
 
 /*
  * Release a set of crypto descriptors.
  */
 void
 crypto_freereq(struct cryptop *crp)
 {
 	struct cryptodesc *crd;
 
 	if (crp == NULL)
 		return;
 
 #ifdef DIAGNOSTIC
 	{
 		struct cryptop *crp2;
 
 		CRYPTO_Q_LOCK();
 		TAILQ_FOREACH(crp2, &crp_q, crp_next) {
 			KASSERT(crp2 != crp,
 			    ("Freeing cryptop from the crypto queue (%p).",
 			    crp));
 		}
 		CRYPTO_Q_UNLOCK();
 		CRYPTO_RETQ_LOCK();
 		TAILQ_FOREACH(crp2, &crp_ret_q, crp_next) {
 			KASSERT(crp2 != crp,
 			    ("Freeing cryptop from the return queue (%p).",
 			    crp));
 		}
 		CRYPTO_RETQ_UNLOCK();
 	}
 #endif
 
 	while ((crd = crp->crp_desc) != NULL) {
 		crp->crp_desc = crd->crd_next;
 		uma_zfree(cryptodesc_zone, crd);
 	}
 	uma_zfree(cryptop_zone, crp);
 }
 
 /*
  * Acquire a set of crypto descriptors.
  */
 struct cryptop *
 crypto_getreq(int num)
 {
 	struct cryptodesc *crd;
 	struct cryptop *crp;
 
 	crp = uma_zalloc(cryptop_zone, M_NOWAIT|M_ZERO);
 	if (crp != NULL) {
 		while (num--) {
 			crd = uma_zalloc(cryptodesc_zone, M_NOWAIT|M_ZERO);
 			if (crd == NULL) {
 				crypto_freereq(crp);
 				return NULL;
 			}
 
 			crd->crd_next = crp->crp_desc;
 			crp->crp_desc = crd;
 		}
 	}
 	return crp;
 }
 
 /*
  * Invoke the callback on behalf of the driver.
  */
 void
 crypto_done(struct cryptop *crp)
 {
 	KASSERT((crp->crp_flags & CRYPTO_F_DONE) == 0,
 		("crypto_done: op already done, flags 0x%x", crp->crp_flags));
 	crp->crp_flags |= CRYPTO_F_DONE;
 	if (crp->crp_etype != 0)
 		cryptostats.cs_errs++;
 #ifdef CRYPTO_TIMING
 	if (crypto_timing)
 		crypto_tstat(&cryptostats.cs_done, &crp->crp_tstamp);
 #endif
 	/*
 	 * CBIMM means unconditionally do the callback immediately;
 	 * CBIFSYNC means do the callback immediately only if the
 	 * operation was done synchronously.  Both are used to avoid
 	 * doing extraneous context switches; the latter is mostly
 	 * used with the software crypto driver.
 	 */
 	if ((crp->crp_flags & CRYPTO_F_CBIMM) ||
 	    ((crp->crp_flags & CRYPTO_F_CBIFSYNC) &&
 	     (CRYPTO_SESID2CAPS(crp->crp_sid) & CRYPTOCAP_F_SYNC))) {
 		/*
 		 * Do the callback directly.  This is ok when the
 		 * callback routine does very little (e.g. the
 		 * /dev/crypto callback method just does a wakeup).
 		 */
 #ifdef CRYPTO_TIMING
 		if (crypto_timing) {
 			/*
 			 * NB: We must copy the timestamp before
 			 * doing the callback as the cryptop is
 			 * likely to be reclaimed.
 			 */
 			struct bintime t = crp->crp_tstamp;
 			crypto_tstat(&cryptostats.cs_cb, &t);
 			crp->crp_callback(crp);
 			crypto_tstat(&cryptostats.cs_finis, &t);
 		} else
 #endif
 			crp->crp_callback(crp);
 	} else {
 		/*
 		 * Normal case; queue the callback for the thread.
 		 */
 		CRYPTO_RETQ_LOCK();
 		if (CRYPTO_RETQ_EMPTY())
 			wakeup_one(&crp_ret_q);	/* shared wait channel */
 		TAILQ_INSERT_TAIL(&crp_ret_q, crp, crp_next);
 		CRYPTO_RETQ_UNLOCK();
 	}
 }
 
 /*
  * Invoke the callback on behalf of the driver.
  */
 void
 crypto_kdone(struct cryptkop *krp)
 {
 	struct cryptocap *cap;
 
 	if (krp->krp_status != 0)
 		cryptostats.cs_kerrs++;
 	CRYPTO_DRIVER_LOCK();
 	/* XXX: What if driver is loaded in the meantime? */
 	if (krp->krp_hid < crypto_drivers_num) {
 		cap = &crypto_drivers[krp->krp_hid];
 		KASSERT(cap->cc_koperations > 0, ("cc_koperations == 0"));
 		cap->cc_koperations--;
 		if (cap->cc_flags & CRYPTOCAP_F_CLEANUP)
 			crypto_remove(cap);
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	CRYPTO_RETQ_LOCK();
 	if (CRYPTO_RETQ_EMPTY())
 		wakeup_one(&crp_ret_q);		/* shared wait channel */
 	TAILQ_INSERT_TAIL(&crp_ret_kq, krp, krp_next);
 	CRYPTO_RETQ_UNLOCK();
 }
 
 int
 crypto_getfeat(int *featp)
 {
 	int hid, kalg, feat = 0;
 
 	CRYPTO_DRIVER_LOCK();
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		const struct cryptocap *cap = &crypto_drivers[hid];
 
 		if ((cap->cc_flags & CRYPTOCAP_F_SOFTWARE) &&
 		    !crypto_devallowsoft) {
 			continue;
 		}
 		for (kalg = 0; kalg < CRK_ALGORITHM_MAX; kalg++)
 			if (cap->cc_kalg[kalg] & CRYPTO_ALG_FLAG_SUPPORTED)
 				feat |=  1 << kalg;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	*featp = feat;
 	return (0);
 }
 
 /*
  * Terminate a thread at module unload.  The process that
  * initiated this is waiting for us to signal that we're gone;
  * wake it up and exit.  We use the driver table lock to insure
  * we don't do the wakeup before they're waiting.  There is no
  * race here because the waiter sleeps on the proc lock for the
  * thread so it gets notified at the right time because of an
  * extra wakeup that's done in exit1().
  */
 static void
 crypto_finis(void *chan)
 {
 	CRYPTO_DRIVER_LOCK();
 	wakeup_one(chan);
 	CRYPTO_DRIVER_UNLOCK();
 	kproc_exit(0);
 }
 
 /*
  * Crypto thread, dispatches crypto requests.
  */
 static void
 crypto_proc(void)
 {
 	struct cryptop *crp, *submit;
 	struct cryptkop *krp;
 	struct cryptocap *cap;
 	u_int32_t hid;
 	int result, hint;
 
-#if defined(__i386__) || defined(__amd64__)
+#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__)
 	fpu_kern_thread(FPU_KERN_NORMAL);
 #endif
 
 	CRYPTO_Q_LOCK();
 	for (;;) {
 		/*
 		 * Find the first element in the queue that can be
 		 * processed and look-ahead to see if multiple ops
 		 * are ready for the same driver.
 		 */
 		submit = NULL;
 		hint = 0;
 		TAILQ_FOREACH(crp, &crp_q, crp_next) {
 			hid = CRYPTO_SESID2HID(crp->crp_sid);
 			cap = crypto_checkdriver(hid);
 			/*
 			 * Driver cannot disappeared when there is an active
 			 * session.
 			 */
 			KASSERT(cap != NULL, ("%s:%u Driver disappeared.",
 			    __func__, __LINE__));
 			if (cap == NULL || cap->cc_dev == NULL) {
 				/* Op needs to be migrated, process it. */
 				if (submit == NULL)
 					submit = crp;
 				break;
 			}
 			if (!cap->cc_qblocked) {
 				if (submit != NULL) {
 					/*
 					 * We stop on finding another op,
 					 * regardless whether its for the same
 					 * driver or not.  We could keep
 					 * searching the queue but it might be
 					 * better to just use a per-driver
 					 * queue instead.
 					 */
 					if (CRYPTO_SESID2HID(submit->crp_sid) == hid)
 						hint = CRYPTO_HINT_MORE;
 					break;
 				} else {
 					submit = crp;
 					if ((submit->crp_flags & CRYPTO_F_BATCH) == 0)
 						break;
 					/* keep scanning for more are q'd */
 				}
 			}
 		}
 		if (submit != NULL) {
 			TAILQ_REMOVE(&crp_q, submit, crp_next);
 			hid = CRYPTO_SESID2HID(submit->crp_sid);
 			cap = crypto_checkdriver(hid);
 			KASSERT(cap != NULL, ("%s:%u Driver disappeared.",
 			    __func__, __LINE__));
 			result = crypto_invoke(cap, submit, hint);
 			if (result == ERESTART) {
 				/*
 				 * The driver ran out of resources, mark the
 				 * driver ``blocked'' for cryptop's and put
 				 * the request back in the queue.  It would
 				 * best to put the request back where we got
 				 * it but that's hard so for now we put it
 				 * at the front.  This should be ok; putting
 				 * it at the end does not work.
 				 */
 				/* XXX validate sid again? */
 				crypto_drivers[CRYPTO_SESID2HID(submit->crp_sid)].cc_qblocked = 1;
 				TAILQ_INSERT_HEAD(&crp_q, submit, crp_next);
 				cryptostats.cs_blocks++;
 			}
 		}
 
 		/* As above, but for key ops */
 		TAILQ_FOREACH(krp, &crp_kq, krp_next) {
 			cap = crypto_checkdriver(krp->krp_hid);
 			if (cap == NULL || cap->cc_dev == NULL) {
 				/*
 				 * Operation needs to be migrated, invalidate
 				 * the assigned device so it will reselect a
 				 * new one below.  Propagate the original
 				 * crid selection flags if supplied.
 				 */
 				krp->krp_hid = krp->krp_crid &
 				    (CRYPTOCAP_F_SOFTWARE|CRYPTOCAP_F_HARDWARE);
 				if (krp->krp_hid == 0)
 					krp->krp_hid =
 				    CRYPTOCAP_F_SOFTWARE|CRYPTOCAP_F_HARDWARE;
 				break;
 			}
 			if (!cap->cc_kqblocked)
 				break;
 		}
 		if (krp != NULL) {
 			TAILQ_REMOVE(&crp_kq, krp, krp_next);
 			result = crypto_kinvoke(krp, krp->krp_hid);
 			if (result == ERESTART) {
 				/*
 				 * The driver ran out of resources, mark the
 				 * driver ``blocked'' for cryptkop's and put
 				 * the request back in the queue.  It would
 				 * best to put the request back where we got
 				 * it but that's hard so for now we put it
 				 * at the front.  This should be ok; putting
 				 * it at the end does not work.
 				 */
 				/* XXX validate sid again? */
 				crypto_drivers[krp->krp_hid].cc_kqblocked = 1;
 				TAILQ_INSERT_HEAD(&crp_kq, krp, krp_next);
 				cryptostats.cs_kblocks++;
 			}
 		}
 
 		if (submit == NULL && krp == NULL) {
 			/*
 			 * Nothing more to be processed.  Sleep until we're
 			 * woken because there are more ops to process.
 			 * This happens either by submission or by a driver
 			 * becoming unblocked and notifying us through
 			 * crypto_unblock.  Note that when we wakeup we
 			 * start processing each queue again from the
 			 * front. It's not clear that it's important to
 			 * preserve this ordering since ops may finish
 			 * out of order if dispatched to different devices
 			 * and some become blocked while others do not.
 			 */
 			crp_sleep = 1;
 			msleep(&crp_q, &crypto_q_mtx, PWAIT, "crypto_wait", 0);
 			crp_sleep = 0;
 			if (cryptoproc == NULL)
 				break;
 			cryptostats.cs_intrs++;
 		}
 	}
 	CRYPTO_Q_UNLOCK();
 
 	crypto_finis(&crp_q);
 }
 
 /*
  * Crypto returns thread, does callbacks for processed crypto requests.
  * Callbacks are done here, rather than in the crypto drivers, because
  * callbacks typically are expensive and would slow interrupt handling.
  */
 static void
 crypto_ret_proc(void)
 {
 	struct cryptop *crpt;
 	struct cryptkop *krpt;
 
 	CRYPTO_RETQ_LOCK();
 	for (;;) {
 		/* Harvest return q's for completed ops */
 		crpt = TAILQ_FIRST(&crp_ret_q);
 		if (crpt != NULL)
 			TAILQ_REMOVE(&crp_ret_q, crpt, crp_next);
 
 		krpt = TAILQ_FIRST(&crp_ret_kq);
 		if (krpt != NULL)
 			TAILQ_REMOVE(&crp_ret_kq, krpt, krp_next);
 
 		if (crpt != NULL || krpt != NULL) {
 			CRYPTO_RETQ_UNLOCK();
 			/*
 			 * Run callbacks unlocked.
 			 */
 			if (crpt != NULL) {
 #ifdef CRYPTO_TIMING
 				if (crypto_timing) {
 					/*
 					 * NB: We must copy the timestamp before
 					 * doing the callback as the cryptop is
 					 * likely to be reclaimed.
 					 */
 					struct bintime t = crpt->crp_tstamp;
 					crypto_tstat(&cryptostats.cs_cb, &t);
 					crpt->crp_callback(crpt);
 					crypto_tstat(&cryptostats.cs_finis, &t);
 				} else
 #endif
 					crpt->crp_callback(crpt);
 			}
 			if (krpt != NULL)
 				krpt->krp_callback(krpt);
 			CRYPTO_RETQ_LOCK();
 		} else {
 			/*
 			 * Nothing more to be processed.  Sleep until we're
 			 * woken because there are more returns to process.
 			 */
 			msleep(&crp_ret_q, &crypto_ret_q_mtx, PWAIT,
 				"crypto_ret_wait", 0);
 			if (cryptoretproc == NULL)
 				break;
 			cryptostats.cs_rets++;
 		}
 	}
 	CRYPTO_RETQ_UNLOCK();
 
 	crypto_finis(&crp_ret_q);
 }
 
 #ifdef DDB
 static void
 db_show_drivers(void)
 {
 	int hid;
 
 	db_printf("%12s %4s %4s %8s %2s %2s\n"
 		, "Device"
 		, "Ses"
 		, "Kops"
 		, "Flags"
 		, "QB"
 		, "KB"
 	);
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		const struct cryptocap *cap = &crypto_drivers[hid];
 		if (cap->cc_dev == NULL)
 			continue;
 		db_printf("%-12s %4u %4u %08x %2u %2u\n"
 		    , device_get_nameunit(cap->cc_dev)
 		    , cap->cc_sessions
 		    , cap->cc_koperations
 		    , cap->cc_flags
 		    , cap->cc_qblocked
 		    , cap->cc_kqblocked
 		);
 	}
 }
 
 DB_SHOW_COMMAND(crypto, db_show_crypto)
 {
 	struct cryptop *crp;
 
 	db_show_drivers();
 	db_printf("\n");
 
 	db_printf("%4s %8s %4s %4s %4s %4s %8s %8s\n",
 	    "HID", "Caps", "Ilen", "Olen", "Etype", "Flags",
 	    "Desc", "Callback");
 	TAILQ_FOREACH(crp, &crp_q, crp_next) {
 		db_printf("%4u %08x %4u %4u %4u %04x %8p %8p\n"
 		    , (int) CRYPTO_SESID2HID(crp->crp_sid)
 		    , (int) CRYPTO_SESID2CAPS(crp->crp_sid)
 		    , crp->crp_ilen, crp->crp_olen
 		    , crp->crp_etype
 		    , crp->crp_flags
 		    , crp->crp_desc
 		    , crp->crp_callback
 		);
 	}
 	if (!TAILQ_EMPTY(&crp_ret_q)) {
 		db_printf("\n%4s %4s %4s %8s\n",
 		    "HID", "Etype", "Flags", "Callback");
 		TAILQ_FOREACH(crp, &crp_ret_q, crp_next) {
 			db_printf("%4u %4u %04x %8p\n"
 			    , (int) CRYPTO_SESID2HID(crp->crp_sid)
 			    , crp->crp_etype
 			    , crp->crp_flags
 			    , crp->crp_callback
 			);
 		}
 	}
 }
 
 DB_SHOW_COMMAND(kcrypto, db_show_kcrypto)
 {
 	struct cryptkop *krp;
 
 	db_show_drivers();
 	db_printf("\n");
 
 	db_printf("%4s %5s %4s %4s %8s %4s %8s\n",
 	    "Op", "Status", "#IP", "#OP", "CRID", "HID", "Callback");
 	TAILQ_FOREACH(krp, &crp_kq, krp_next) {
 		db_printf("%4u %5u %4u %4u %08x %4u %8p\n"
 		    , krp->krp_op
 		    , krp->krp_status
 		    , krp->krp_iparams, krp->krp_oparams
 		    , krp->krp_crid, krp->krp_hid
 		    , krp->krp_callback
 		);
 	}
 	if (!TAILQ_EMPTY(&crp_ret_q)) {
 		db_printf("%4s %5s %8s %4s %8s\n",
 		    "Op", "Status", "CRID", "HID", "Callback");
 		TAILQ_FOREACH(krp, &crp_ret_kq, krp_next) {
 			db_printf("%4u %5u %08x %4u %8p\n"
 			    , krp->krp_op
 			    , krp->krp_status
 			    , krp->krp_crid, krp->krp_hid
 			    , krp->krp_callback
 			);
 		}
 	}
 }
 #endif
 
 int crypto_modevent(module_t mod, int type, void *unused);
 
 /*
  * Initialization code, both for static and dynamic loading.
  * Note this is not invoked with the usual MODULE_DECLARE
  * mechanism but instead is listed as a dependency by the
  * cryptosoft driver.  This guarantees proper ordering of
  * calls on module load/unload.
  */
 int
 crypto_modevent(module_t mod, int type, void *unused)
 {
 	int error = EINVAL;
 
 	switch (type) {
 	case MOD_LOAD:
 		error = crypto_init();
 		if (error == 0 && bootverbose)
 			printf("crypto: <crypto core>\n");
 		break;
 	case MOD_UNLOAD:
 		/*XXX disallow if active sessions */
 		error = 0;
 		crypto_destroy();
 		return 0;
 	}
 	return error;
 }
 MODULE_VERSION(crypto, 1);
 MODULE_DEPEND(crypto, zlib, 1, 1, 1);