diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index 6181df35261e..8015fe5da81a 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -1,125 +1,116 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)proc.h	7.1 (Berkeley) 5/15/91
  * $FreeBSD$
  */
 
 #ifdef __i386__
 #include <i386/proc.h>
 #else /* !__i386__ */
 
 #ifndef _MACHINE_PROC_H_
 #define	_MACHINE_PROC_H_
 
 #include <sys/queue.h>
 #include <machine/pcb.h>
 #include <machine/segments.h>
 
 /*
  * List of locks
  *	c  - proc lock
  *	k  - only accessed by curthread
  *	pp - pmap.c:invl_gen_mtx
  */
 
 struct proc_ldt {
 	caddr_t ldt_base;
 	int     ldt_refcnt;
 };
 
 #define PMAP_INVL_GEN_NEXT_INVALID	0x1ULL
 struct pmap_invl_gen {
 	u_long gen;			/* (k) */
 	union {
 		LIST_ENTRY(pmap_invl_gen) link;	/* (pp) */
 		struct {
 			struct pmap_invl_gen *next;
 			u_char saved_pri;
 		};
 	};
 } __aligned(16);
 
 /*
  * Machine-dependent part of the proc structure for AMD64.
  */
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_flags;	/* (k) */
 	register_t md_spurflt_addr;	/* (k) Spurious page fault address. */
 	struct pmap_invl_gen md_invl_gen;
 	register_t md_efirt_tmp;	/* (k) */
 	int	md_efirt_dis_pf;	/* (k) */
 	struct pcb md_pcb;
 	vm_offset_t md_stack_base;
 	void *md_usr_fpu_save;
 };
 
 struct mdproc {
 	struct proc_ldt *md_ldt;	/* (t) per-process ldt */
 	struct system_segment_descriptor md_ldt_sd;
 	u_int md_flags;			/* (c) md process flags P_MD */
 };
 
 #define	P_MD_KPTI		0x00000001	/* Enable KPTI on exec */
 #define	P_MD_LA48		0x00000002	/* Request LA48 after exec */
 #define	P_MD_LA57		0x00000004	/* Request LA57 after exec */
 
 #define	KINFO_PROC_SIZE 1088
 #define	KINFO_PROC32_SIZE 768
 
 #ifdef	_KERNEL
 
-/* Get the current kernel thread stack usage. */
-#define GET_STACK_USAGE(total, used) do {				\
-	struct thread	*td = curthread;				\
-	(total) = td->td_kstack_pages * PAGE_SIZE;			\
-	(used) = (char *)td->td_kstack +				\
-	    td->td_kstack_pages * PAGE_SIZE -				\
-	    (char *)&td;						\
-} while (0)
-
 struct proc_ldt *user_ldt_alloc(struct proc *, int);
 void user_ldt_free(struct thread *);
 struct sysarch_args;
 int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space);
 int amd64_set_ldt_data(struct thread *td, int start, int num,
     struct user_segment_descriptor *descs);
 
 extern struct mtx dt_lock;
 extern int max_ldt_segment;
 
 #define	NARGREGS	6
 
 #endif  /* _KERNEL */
 
 #endif /* !_MACHINE_PROC_H_ */
 
 #endif /* __i386__ */
diff --git a/sys/amd64/include/stack.h b/sys/amd64/include/stack.h
index 091ae33893d4..ff21ee28b5a3 100644
--- a/sys/amd64/include/stack.h
+++ b/sys/amd64/include/stack.h
@@ -1,6 +1,30 @@
 /*
  * This file is in the public domain.
  */
 /* $FreeBSD$ */
 
+#ifndef _MACHINE_STACK_H_
+#define	_MACHINE_STACK_H_
+
 #include <x86/stack.h>
+
+#ifdef _SYS_PROC_H_
+
+/* Get the current kernel thread stack usage. */
+#define GET_STACK_USAGE(total, used) do {				\
+	struct thread	*td = curthread;				\
+	(total) = td->td_kstack_pages * PAGE_SIZE;			\
+	(used) = (char *)td->td_kstack +				\
+	    td->td_kstack_pages * PAGE_SIZE -				\
+	    (char *)&td;						\
+} while (0)
+
+static __inline bool
+kstack_contains(struct thread *td, vm_offset_t va, size_t len)
+{
+	return (va >= td->td_kstack && va + len >= va &&
+	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
+}
+#endif	/* _SYS_PROC_H_ */
+
+#endif
diff --git a/sys/arm/arm/ptrace_machdep.c b/sys/arm/arm/ptrace_machdep.c
index 3edadbd72ddf..a347a1dfac95 100644
--- a/sys/arm/arm/ptrace_machdep.c
+++ b/sys/arm/arm/ptrace_machdep.c
@@ -1,117 +1,118 @@
 /*-
  * Copyright (c) 2017 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/elf.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
+#include <machine/pcb.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef VFP
 static bool
 get_arm_vfp(struct regset *rs, struct thread *td, void *buf, size_t *sizep)
 {
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(mcontext_vfp_t),
 		    ("%s: invalid size", __func__));
 		get_vfpcontext(td, buf);
 	}
 	*sizep = sizeof(mcontext_vfp_t);
 	return (true);
 }
 
 static bool
 set_arm_vfp(struct regset *rs, struct thread *td, void *buf,
     size_t size)
 {
 	KASSERT(size == sizeof(mcontext_vfp_t), ("%s: invalid size", __func__));
 	set_vfpcontext(td, buf);
 	return (true);
 }
 
 static struct regset regset_arm_vfp = {
 	.note = NT_ARM_VFP,
 	.size = sizeof(mcontext_vfp_t),
 	.get = get_arm_vfp,
 	.set = set_arm_vfp,
 };
 ELF_REGSET(regset_arm_vfp);
 #endif
 
 static bool
 get_arm_tls(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(td->td_pcb->pcb_regs.sf_tpidrurw),
 		    ("%s: invalid size", __func__));
 		memcpy(buf, &td->td_pcb->pcb_regs.sf_tpidrurw,
 		    sizeof(td->td_pcb->pcb_regs.sf_tpidrurw));
 	}
 	*sizep = sizeof(td->td_pcb->pcb_regs.sf_tpidrurw);
 
 	return (true);
 }
 
 static struct regset regset_arm_tls = {
 	.note = NT_ARM_TLS,
 	.size = sizeof(uint32_t),
 	.get = get_arm_tls,
 };
 ELF_REGSET(regset_arm_tls);
 
 int
 cpu_ptrace(struct thread *td, int req, void *addr, int data)
 {
 #ifdef VFP
 	mcontext_vfp_t vfp;
 #endif
 	int error;
 
 	switch (req) {
 #ifdef VFP
 	case PT_GETVFPREGS:
 		get_vfpcontext(td, &vfp);
 		error = copyout(&vfp, addr, sizeof(vfp));
 		break;
 	case PT_SETVFPREGS:
 		error = copyin(addr, &vfp, sizeof(vfp));
 		if (error == 0)
 			set_vfpcontext(td, &vfp);
 		break;
 #endif
 	default:
 		error = EINVAL;
 	}
 
 	return (error);
 }
diff --git a/sys/arm/include/proc.h b/sys/arm/include/proc.h
index 76b05b335420..9db28358cc39 100644
--- a/sys/arm/include/proc.h
+++ b/sys/arm/include/proc.h
@@ -1,70 +1,59 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the University of
  *      California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      from: @(#)proc.h        7.1 (Berkeley) 5/15/91
  *	from: FreeBSD: src/sys/i386/include/proc.h,v 1.11 2001/06/29
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PROC_H_
 #define	_MACHINE_PROC_H_
 
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_cspr;	/* (k) */
 	register_t md_spurflt_addr;     /* (k) Spurious page fault address. */
 	int md_ptrace_instr;
 	int md_ptrace_addr;
 	int md_ptrace_instr_alt;
 	int md_ptrace_addr_alt;
 };
 
 struct mdproc {
 	long	md_dummy;
 };
 
 #define	KINFO_PROC_SIZE 816
 
-#ifdef _KERNEL
-#include <machine/pcb.h>
-
-/* Get the current kernel thread stack usage. */
-#define	GET_STACK_USAGE(total, used) do {				\
-	struct thread *td = curthread;					\
-	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
-	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
-} while (0)
-
-#endif  /* _KERNEL */
 #endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/arm/include/stack.h b/sys/arm/include/stack.h
index 4bc384f775bc..e8d130517be5 100644
--- a/sys/arm/include/stack.h
+++ b/sys/arm/include/stack.h
@@ -1,68 +1,87 @@
 /*-
  * Copyright (c) 2000, 2001 Ben Harris
  * Copyright (c) 1996 Scott K. Stevens
  *
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_STACK_H_
 #define	_MACHINE_STACK_H_
 
 #define INKERNEL(va)	(((vm_offset_t)(va)) >= VM_MIN_KERNEL_ADDRESS)
 
 #define FR_SCP	(0)
 #define FR_RLV	(-1)
 #define FR_RSP	(-2)
 #define FR_RFP	(-3)
 
 /* The state of the unwind process */
 struct unwind_state {
 	uint32_t registers[16];
 	uint32_t start_pc;
 	uint32_t *insn;
 	u_int entries;
 	u_int byte;
 	uint16_t update_mask;
 };
 
 /* The register names */
 #define	FP	11
 #define	SP	13
 #define	LR	14
 #define	PC	15
 
 #ifdef _KERNEL
 
 int unwind_stack_one(struct unwind_state *, int);
 
 struct linker_file;
 void unwind_module_loaded(struct linker_file *);
 void unwind_module_unloaded(struct linker_file *);
 
+#ifdef _SYS_PROC_H_
+
+#include <machine/pcb.h>
+
+/* Get the current kernel thread stack usage. */
+#define	GET_STACK_USAGE(total, used) do {				\
+	struct thread *td = curthread;					\
+	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
+	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
+} while (0)
+
+static __inline bool
+kstack_contains(struct thread *td, vm_offset_t va, size_t len)
+{
+	return (va >= td->td_kstack && va + len >= va &&
+	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
+}
+#endif	/* _SYS_PROC_H_ */
+
 #endif
 
 #endif /* !_MACHINE_STACK_H_ */
diff --git a/sys/arm64/arm64/debug_monitor.c b/sys/arm64/arm64/debug_monitor.c
index 2ec76c9a2f33..52bcf1e5e603 100644
--- a/sys/arm64/arm64/debug_monitor.c
+++ b/sys/arm64/arm64/debug_monitor.c
@@ -1,602 +1,603 @@
 /*-
  * Copyright (c) 2014 The FreeBSD Foundation
  *
  * This software was developed by Semihalf under
  * the sponsorship of the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ddb.h"
 #include "opt_gdb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kdb.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/sysent.h>
 
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/debug_monitor.h>
 #include <machine/kdb.h>
+#include <machine/pcb.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 enum dbg_t {
 	DBG_TYPE_BREAKPOINT = 0,
 	DBG_TYPE_WATCHPOINT = 1,
 };
 
 static int dbg_watchpoint_num;
 static int dbg_breakpoint_num;
 static struct debug_monitor_state kernel_monitor = {
 	.dbg_flags = DBGMON_KERNEL
 };
 
 /* Called from the exception handlers */
 void dbg_monitor_enter(struct thread *);
 void dbg_monitor_exit(struct thread *, struct trapframe *);
 
 /* Watchpoints/breakpoints control register bitfields */
 #define DBG_WATCH_CTRL_LEN_1		(0x1 << 5)
 #define DBG_WATCH_CTRL_LEN_2		(0x3 << 5)
 #define DBG_WATCH_CTRL_LEN_4		(0xf << 5)
 #define DBG_WATCH_CTRL_LEN_8		(0xff << 5)
 #define DBG_WATCH_CTRL_LEN_MASK(x)	((x) & (0xff << 5))
 #define DBG_WATCH_CTRL_EXEC		(0x0 << 3)
 #define DBG_WATCH_CTRL_LOAD		(0x1 << 3)
 #define DBG_WATCH_CTRL_STORE		(0x2 << 3)
 #define DBG_WATCH_CTRL_ACCESS_MASK(x)	((x) & (0x3 << 3))
 
 /* Common for breakpoint and watchpoint */
 #define DBG_WB_CTRL_EL1		(0x1 << 1)
 #define DBG_WB_CTRL_EL0		(0x2 << 1)
 #define DBG_WB_CTRL_ELX_MASK(x)	((x) & (0x3 << 1))
 #define DBG_WB_CTRL_E		(0x1 << 0)
 
 #define DBG_REG_BASE_BVR	0
 #define DBG_REG_BASE_BCR	(DBG_REG_BASE_BVR + 16)
 #define DBG_REG_BASE_WVR	(DBG_REG_BASE_BCR + 16)
 #define DBG_REG_BASE_WCR	(DBG_REG_BASE_WVR + 16)
 
 /* Watchpoint/breakpoint helpers */
 #define DBG_WB_WVR	"wvr"
 #define DBG_WB_WCR	"wcr"
 #define DBG_WB_BVR	"bvr"
 #define DBG_WB_BCR	"bcr"
 
 #define DBG_WB_READ(reg, num, val) do {					\
 	__asm __volatile("mrs %0, dbg" reg #num "_el1" : "=r" (val));	\
 } while (0)
 
 #define DBG_WB_WRITE(reg, num, val) do {				\
 	__asm __volatile("msr dbg" reg #num "_el1, %0" :: "r" (val));	\
 } while (0)
 
 #define READ_WB_REG_CASE(reg, num, offset, val)		\
 	case (num + offset):				\
 		DBG_WB_READ(reg, num, val);		\
 		break
 
 #define WRITE_WB_REG_CASE(reg, num, offset, val)	\
 	case (num + offset):				\
 		DBG_WB_WRITE(reg, num, val);		\
 		break
 
 #define SWITCH_CASES_READ_WB_REG(reg, offset, val)	\
 	READ_WB_REG_CASE(reg,  0, offset, val);		\
 	READ_WB_REG_CASE(reg,  1, offset, val);		\
 	READ_WB_REG_CASE(reg,  2, offset, val);		\
 	READ_WB_REG_CASE(reg,  3, offset, val);		\
 	READ_WB_REG_CASE(reg,  4, offset, val);		\
 	READ_WB_REG_CASE(reg,  5, offset, val);		\
 	READ_WB_REG_CASE(reg,  6, offset, val);		\
 	READ_WB_REG_CASE(reg,  7, offset, val);		\
 	READ_WB_REG_CASE(reg,  8, offset, val);		\
 	READ_WB_REG_CASE(reg,  9, offset, val);		\
 	READ_WB_REG_CASE(reg, 10, offset, val);		\
 	READ_WB_REG_CASE(reg, 11, offset, val);		\
 	READ_WB_REG_CASE(reg, 12, offset, val);		\
 	READ_WB_REG_CASE(reg, 13, offset, val);		\
 	READ_WB_REG_CASE(reg, 14, offset, val);		\
 	READ_WB_REG_CASE(reg, 15, offset, val)
 
 #define SWITCH_CASES_WRITE_WB_REG(reg, offset, val)	\
 	WRITE_WB_REG_CASE(reg,  0, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  1, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  2, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  3, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  4, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  5, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  6, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  7, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  8, offset, val);	\
 	WRITE_WB_REG_CASE(reg,  9, offset, val);	\
 	WRITE_WB_REG_CASE(reg, 10, offset, val);	\
 	WRITE_WB_REG_CASE(reg, 11, offset, val);	\
 	WRITE_WB_REG_CASE(reg, 12, offset, val);	\
 	WRITE_WB_REG_CASE(reg, 13, offset, val);	\
 	WRITE_WB_REG_CASE(reg, 14, offset, val);	\
 	WRITE_WB_REG_CASE(reg, 15, offset, val)
 
 #ifdef DDB
 static uint64_t
 dbg_wb_read_reg(int reg, int n)
 {
 	uint64_t val = 0;
 
 	switch (reg + n) {
 	SWITCH_CASES_READ_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val);
 	SWITCH_CASES_READ_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val);
 	SWITCH_CASES_READ_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val);
 	SWITCH_CASES_READ_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val);
 	default:
 		printf("trying to read from wrong debug register %d\n", n);
 	}
 
 	return val;
 }
 #endif /* DDB */
 
 static void
 dbg_wb_write_reg(int reg, int n, uint64_t val)
 {
 	switch (reg + n) {
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val);
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val);
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val);
 	SWITCH_CASES_WRITE_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val);
 	default:
 		printf("trying to write to wrong debug register %d\n", n);
 		return;
 	}
 	isb();
 }
 
 #if defined(DDB) || defined(GDB)
 void
 kdb_cpu_set_singlestep(void)
 {
 
 	KASSERT((READ_SPECIALREG(daif) & PSR_D) == PSR_D,
 	    ("%s: debug exceptions are not masked", __func__));
 
 	kdb_frame->tf_spsr |= PSR_SS;
 	WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) |
 	    MDSCR_SS | MDSCR_KDE);
 
 	/*
 	 * Disable breakpoints and watchpoints, e.g. stepping
 	 * over watched instruction will trigger break exception instead of
 	 * single-step exception and locks CPU on that instruction for ever.
 	 */
 	if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) & ~MDSCR_MDE);
 	}
 }
 
 void
 kdb_cpu_clear_singlestep(void)
 {
 
 	KASSERT((READ_SPECIALREG(daif) & PSR_D) == PSR_D,
 	    ("%s: debug exceptions are not masked", __func__));
 
 	WRITE_SPECIALREG(mdscr_el1, READ_SPECIALREG(mdscr_el1) &
 	    ~(MDSCR_SS | MDSCR_KDE));
 
 	/* Restore breakpoints and watchpoints */
 	if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) | MDSCR_MDE);
 
 		if ((kernel_monitor.dbg_flags & DBGMON_KERNEL) != 0) {
 			WRITE_SPECIALREG(mdscr_el1,
 			    READ_SPECIALREG(mdscr_el1) | MDSCR_KDE);
 		}
 	}
 }
 
 int
 kdb_cpu_set_watchpoint(vm_offset_t addr, vm_size_t size, int access)
 {
 	enum dbg_access_t dbg_access;
 
 	switch (access) {
 	case KDB_DBG_ACCESS_R:
 		dbg_access = HW_BREAKPOINT_R;
 		break;
 	case KDB_DBG_ACCESS_W:
 		dbg_access = HW_BREAKPOINT_W;
 		break;
 	case KDB_DBG_ACCESS_RW:
 		dbg_access = HW_BREAKPOINT_RW;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	return (dbg_setup_watchpoint(NULL, addr, size, dbg_access));
 }
 
 int
 kdb_cpu_clr_watchpoint(vm_offset_t addr, vm_size_t size)
 {
 
 	return (dbg_remove_watchpoint(NULL, addr, size));
 }
 #endif /* DDB || GDB */
 
 #ifdef DDB
 static const char *
 dbg_watchtype_str(uint32_t type)
 {
 	switch (type) {
 		case DBG_WATCH_CTRL_EXEC:
 			return ("execute");
 		case DBG_WATCH_CTRL_STORE:
 			return ("write");
 		case DBG_WATCH_CTRL_LOAD:
 			return ("read");
 		case DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE:
 			return ("read/write");
 		default:
 			return ("invalid");
 	}
 }
 
 static int
 dbg_watchtype_len(uint32_t len)
 {
 	switch (len) {
 	case DBG_WATCH_CTRL_LEN_1:
 		return (1);
 	case DBG_WATCH_CTRL_LEN_2:
 		return (2);
 	case DBG_WATCH_CTRL_LEN_4:
 		return (4);
 	case DBG_WATCH_CTRL_LEN_8:
 		return (8);
 	default:
 		return (0);
 	}
 }
 
 void
 dbg_show_watchpoint(void)
 {
 	uint32_t wcr, len, type;
 	uint64_t addr;
 	int i;
 
 	db_printf("\nhardware watchpoints:\n");
 	db_printf("  watch    status        type  len             address              symbol\n");
 	db_printf("  -----  --------  ----------  ---  ------------------  ------------------\n");
 	for (i = 0; i < dbg_watchpoint_num; i++) {
 		wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i);
 		if ((wcr & DBG_WB_CTRL_E) != 0) {
 			type = DBG_WATCH_CTRL_ACCESS_MASK(wcr);
 			len = DBG_WATCH_CTRL_LEN_MASK(wcr);
 			addr = dbg_wb_read_reg(DBG_REG_BASE_WVR, i);
 			db_printf("  %-5d  %-8s  %10s  %3d  0x%16lx  ",
 			    i, "enabled", dbg_watchtype_str(type),
 			    dbg_watchtype_len(len), addr);
 			db_printsym((db_addr_t)addr, DB_STGY_ANY);
 			db_printf("\n");
 		} else {
 			db_printf("  %-5d  disabled\n", i);
 		}
 	}
 }
 #endif /* DDB */
 
 static int
 dbg_find_free_slot(struct debug_monitor_state *monitor, enum dbg_t type)
 {
 	uint64_t *reg;
 	u_int max, i;
 
 	switch(type) {
 	case DBG_TYPE_BREAKPOINT:
 		max = dbg_breakpoint_num;
 		reg = monitor->dbg_bcr;
 		break;
 	case DBG_TYPE_WATCHPOINT:
 		max = dbg_watchpoint_num;
 		reg = monitor->dbg_wcr;
 		break;
 	default:
 		printf("Unsupported debug type\n");
 		return (i);
 	}
 
 	for (i = 0; i < max; i++) {
 		if ((reg[i] & DBG_WB_CTRL_E) == 0)
 			return (i);
 	}
 
 	return (-1);
 }
 
 static int
 dbg_find_slot(struct debug_monitor_state *monitor, enum dbg_t type,
     vm_offset_t addr)
 {
 	uint64_t *reg_addr, *reg_ctrl;
 	u_int max, i;
 
 	switch(type) {
 	case DBG_TYPE_BREAKPOINT:
 		max = dbg_breakpoint_num;
 		reg_addr = monitor->dbg_bvr;
 		reg_ctrl = monitor->dbg_bcr;
 		break;
 	case DBG_TYPE_WATCHPOINT:
 		max = dbg_watchpoint_num;
 		reg_addr = monitor->dbg_wvr;
 		reg_ctrl = monitor->dbg_wcr;
 		break;
 	default:
 		printf("Unsupported debug type\n");
 		return (i);
 	}
 
 	for (i = 0; i < max; i++) {
 		if (reg_addr[i] == addr &&
 		    (reg_ctrl[i] & DBG_WB_CTRL_E) != 0)
 			return (i);
 	}
 
 	return (-1);
 }
 
 int
 dbg_setup_watchpoint(struct debug_monitor_state *monitor, vm_offset_t addr,
     vm_size_t size, enum dbg_access_t access)
 {
 	uint64_t wcr_size, wcr_priv, wcr_access;
 	u_int i;
 
 	if (monitor == NULL)
 		monitor = &kernel_monitor;
 
 	i = dbg_find_free_slot(monitor, DBG_TYPE_WATCHPOINT);
 	if (i == -1) {
 		printf("Can not find slot for watchpoint, max %d"
 		    " watchpoints supported\n", dbg_watchpoint_num);
 		return (EBUSY);
 	}
 
 	switch(size) {
 	case 1:
 		wcr_size = DBG_WATCH_CTRL_LEN_1;
 		break;
 	case 2:
 		wcr_size = DBG_WATCH_CTRL_LEN_2;
 		break;
 	case 4:
 		wcr_size = DBG_WATCH_CTRL_LEN_4;
 		break;
 	case 8:
 		wcr_size = DBG_WATCH_CTRL_LEN_8;
 		break;
 	default:
 		printf("Unsupported address size for watchpoint: %zu\n", size);
 		return (EINVAL);
 	}
 
 	if ((monitor->dbg_flags & DBGMON_KERNEL) == 0)
 		wcr_priv = DBG_WB_CTRL_EL0;
 	else
 		wcr_priv = DBG_WB_CTRL_EL1;
 
 	switch(access) {
 	case HW_BREAKPOINT_X:
 		wcr_access = DBG_WATCH_CTRL_EXEC;
 		break;
 	case HW_BREAKPOINT_R:
 		wcr_access = DBG_WATCH_CTRL_LOAD;
 		break;
 	case HW_BREAKPOINT_W:
 		wcr_access = DBG_WATCH_CTRL_STORE;
 		break;
 	case HW_BREAKPOINT_RW:
 		wcr_access = DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE;
 		break;
 	default:
 		printf("Unsupported access type for watchpoint: %d\n", access);
 		return (EINVAL);
 	}
 
 	monitor->dbg_wvr[i] = addr;
 	monitor->dbg_wcr[i] = wcr_size | wcr_access | wcr_priv | DBG_WB_CTRL_E;
 	monitor->dbg_enable_count++;
 	monitor->dbg_flags |= DBGMON_ENABLED;
 
 	dbg_register_sync(monitor);
 	return (0);
 }
 
 int
 dbg_remove_watchpoint(struct debug_monitor_state *monitor, vm_offset_t addr,
     vm_size_t size)
 {
 	u_int i;
 
 	if (monitor == NULL)
 		monitor = &kernel_monitor;
 
 	i = dbg_find_slot(monitor, DBG_TYPE_WATCHPOINT, addr);
 	if (i == -1) {
 		printf("Can not find watchpoint for address 0%lx\n", addr);
 		return (EINVAL);
 	}
 
 	monitor->dbg_wvr[i] = 0;
 	monitor->dbg_wcr[i] = 0;
 	monitor->dbg_enable_count--;
 	if (monitor->dbg_enable_count == 0)
 		monitor->dbg_flags &= ~DBGMON_ENABLED;
 
 	dbg_register_sync(monitor);
 	return (0);
 }
 
 void
 dbg_register_sync(struct debug_monitor_state *monitor)
 {
 	uint64_t mdscr;
 	int i;
 
 	if (monitor == NULL)
 		monitor = &kernel_monitor;
 
 	mdscr = READ_SPECIALREG(mdscr_el1);
 	if ((monitor->dbg_flags & DBGMON_ENABLED) == 0) {
 		mdscr &= ~(MDSCR_MDE | MDSCR_KDE);
 	} else {
 		for (i = 0; i < dbg_breakpoint_num; i++) {
 			dbg_wb_write_reg(DBG_REG_BASE_BCR, i,
 			    monitor->dbg_bcr[i]);
 			dbg_wb_write_reg(DBG_REG_BASE_BVR, i,
 			    monitor->dbg_bvr[i]);
 		}
 
 		for (i = 0; i < dbg_watchpoint_num; i++) {
 			dbg_wb_write_reg(DBG_REG_BASE_WCR, i,
 			    monitor->dbg_wcr[i]);
 			dbg_wb_write_reg(DBG_REG_BASE_WVR, i,
 			    monitor->dbg_wvr[i]);
 		}
 		mdscr |= MDSCR_MDE;
 		if ((monitor->dbg_flags & DBGMON_KERNEL) == DBGMON_KERNEL)
 			mdscr |= MDSCR_KDE;
 	}
 	WRITE_SPECIALREG(mdscr_el1, mdscr);
 	isb();
 }
 
 void
 dbg_monitor_init(void)
 {
 	uint64_t aa64dfr0;
 	u_int i;
 
 	/* Find out many breakpoints and watchpoints we can use */
 	aa64dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
 	dbg_watchpoint_num = ID_AA64DFR0_WRPs_VAL(aa64dfr0);
 	dbg_breakpoint_num = ID_AA64DFR0_BRPs_VAL(aa64dfr0);
 
 	if (bootverbose && PCPU_GET(cpuid) == 0) {
 		printf("%d watchpoints and %d breakpoints supported\n",
 		    dbg_watchpoint_num, dbg_breakpoint_num);
 	}
 
 	/*
 	 * We have limited number of {watch,break}points, each consists of
 	 * two registers:
 	 * - wcr/bcr regsiter configurates corresponding {watch,break}point
 	 *   behaviour
 	 * - wvr/bvr register keeps address we are hunting for
 	 *
 	 * Reset all breakpoints and watchpoints.
 	 */
 	for (i = 0; i < dbg_watchpoint_num; i++) {
 		dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
 		dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
 	}
 
 	for (i = 0; i < dbg_breakpoint_num; i++) {
 		dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
 		dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
 	}
 
 	dbg_enable();
 }
 
 void
 dbg_monitor_enter(struct thread *thread)
 {
 	int i;
 
 	if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
 		/* Install the kernel version of the registers */
 		dbg_register_sync(&kernel_monitor);
 	} else if ((thread->td_pcb->pcb_dbg_regs.dbg_flags & DBGMON_ENABLED) != 0) {
 		/* Disable the user breakpoints until we return to userspace */
 		for (i = 0; i < dbg_watchpoint_num; i++) {
 			dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
 			dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
 		}
 
 		for (i = 0; i < dbg_breakpoint_num; ++i) {
 			dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
 			dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
 		}
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) & ~(MDSCR_MDE | MDSCR_KDE));
 		isb();
 	}
 }
 
 void
 dbg_monitor_exit(struct thread *thread, struct trapframe *frame)
 {
 	int i;
 
 	/*
 	 * PSR_D is an aarch64-only flag. On aarch32, it switches
 	 * the processor to big-endian, so avoid setting it for
 	 * 32bits binaries.
 	 */
 	if (!(SV_PROC_FLAG(thread->td_proc, SV_ILP32)))
 		frame->tf_spsr |= PSR_D;
 	if ((thread->td_pcb->pcb_dbg_regs.dbg_flags & DBGMON_ENABLED) != 0) {
 		/* Install the thread's version of the registers */
 		dbg_register_sync(&thread->td_pcb->pcb_dbg_regs);
 		frame->tf_spsr &= ~PSR_D;
 	} else if ((kernel_monitor.dbg_flags & DBGMON_ENABLED) != 0) {
 		/* Disable the kernel breakpoints until we re-enter */
 		for (i = 0; i < dbg_watchpoint_num; i++) {
 			dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
 			dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
 		}
 
 		for (i = 0; i < dbg_breakpoint_num; ++i) {
 			dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
 			dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
 		}
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) & ~(MDSCR_MDE | MDSCR_KDE));
 		isb();
 	}
 }
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
index 627973ecfd3d..7b346ed81b2c 100644
--- a/sys/arm64/arm64/elf32_machdep.c
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -1,289 +1,290 @@
 /*-
  * Copyright (c) 2014, 2015 The FreeBSD Foundation.
  * Copyright (c) 2014, 2017 Andrew Turner.
  * Copyright (c) 2018 Olivier Houchard
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	__ELF_WORD_SIZE 32
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/reg.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <machine/elf.h>
+#include <machine/pcb.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #include <compat/freebsd32/freebsd32_util.h>
 
 #define	FREEBSD32_MINUSER	0x00001000
 #define	FREEBSD32_MAXUSER	((1ul << 32) - PAGE_SIZE)
 #define	FREEBSD32_SHAREDPAGE	(FREEBSD32_MAXUSER - PAGE_SIZE)
 #define	FREEBSD32_USRSTACK	FREEBSD32_SHAREDPAGE
 
 extern const char *freebsd32_syscallnames[];
 
 extern char aarch32_sigcode[];
 extern int sz_aarch32_sigcode;
 
 static int freebsd32_fetch_syscall_args(struct thread *td);
 static void freebsd32_setregs(struct thread *td, struct image_params *imgp,
     u_long stack);
 static void freebsd32_set_syscall_retval(struct thread *, int);
 
 static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *,
     uint32_t *);
 
 extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 
 u_long __read_frequently elf32_hwcap;
 u_long __read_frequently elf32_hwcap2;
 
 static struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_fixup	= elf32_freebsd_fixup,
 	.sv_sendsig	= freebsd32_sendsig,
 	.sv_sigcode	= aarch32_sigcode,
 	.sv_szsigcode	= &sz_aarch32_sigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= elf32_coredump,
 	.sv_elf_core_osabi = ELFOSABI_FREEBSD,
 	.sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = elf32_prepare_notes,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_minuser	= FREEBSD32_MINUSER,
 	.sv_maxuser	= FREEBSD32_MAXUSER,
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct freebsd32_ps_strings),
 	.sv_stackprot	= VM_PROT_READ | VM_PROT_WRITE,
 	.sv_copyout_auxargs = elf32_freebsd_copyout_auxargs,
 	.sv_copyout_strings = freebsd32_copyout_strings,
 	.sv_setregs	= freebsd32_setregs,
 	.sv_fixlimit	= NULL, // XXX
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP |
 	    SV_RNG_SEED_VER,
 	.sv_set_syscall_retval = freebsd32_set_syscall_retval,
 	.sv_fetch_syscall_args = freebsd32_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &elf32_hwcap,
 	.sv_hwcap2	= &elf32_hwcap2,
 	.sv_onexec_old	= exec_onexec_old,
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end	= SET_LIMIT(__elfN(regset)),
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_ARM,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported= elf32_arm_abi_supported,
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info);
 
 static boolean_t
 elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused,
     uint32_t *fctl0 __unused)
 {
 	const Elf32_Ehdr *hdr;
 
 	/* Check if we support AArch32 */
 	if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) !=
 	    ID_AA64PFR0_EL0_64_32)
 		return (FALSE);
 
 #define	EF_ARM_EABI_FREEBSD_MIN	EF_ARM_EABI_VER4
 	hdr = (const Elf32_Ehdr *)imgp->image_header;
 	if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) {
 		if (bootverbose)
 			uprintf("Attempting to execute non EABI binary "
 			    "(rev %d) image %s",
 			    EF_ARM_EABI_VERSION(hdr->e_flags),
 			    imgp->args->fname);
 		return (FALSE);
         }
 
 	return (TRUE);
 }
 
 static int
 freebsd32_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	register_t *ap;
 	struct syscall_args *sa;
 	int error, i, nap, narg;
 	unsigned int args[4];
 
 	nap = 4;
 	p = td->td_proc;
 	ap = td->td_frame->tf_x;
 	sa = &td->td_sa;
 
 	/* r7 is the syscall id */
 	sa->code = td->td_frame->tf_x[7];
 	sa->original_code = sa->code;
 
 	if (sa->code == SYS_syscall) {
 		sa->code = *ap++;
 		nap--;
 	} else if (sa->code == SYS___syscall) {
 		sa->code = ap[1];
 		nap -= 2;
 		ap += 2;
 	}
 
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	narg = sa->callp->sy_narg;
 	for (i = 0; i < nap; i++)
 		sa->args[i] = ap[i];
 	if (narg > nap) {
 		if (narg - nap > nitems(args))
 			panic("Too many system call arguiments");
 		error = copyin((void *)td->td_frame->tf_x[13], args,
 		    (narg - nap) * sizeof(int));
 		if (error != 0)
 			return (error);
 		for (i = 0; i < (narg - nap); i++)
 			sa->args[i + nap] = args[i];
 	}
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = 0;
 
 	return (0);
 }
 
 static void
 freebsd32_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	switch (error) {
 	case 0:
 		frame->tf_x[0] = td->td_retval[0];
 		frame->tf_x[1] = td->td_retval[1];
 		frame->tf_spsr &= ~PSR_C;
 		break;
 	case ERESTART:
 		/*
 		 * Reconstruct the pc to point at the swi.
 		 */
 		if ((frame->tf_spsr & PSR_T) != 0)
 			frame->tf_elr -= 2; //THUMB_INSN_SIZE;
 		else
 			frame->tf_elr -= 4; //INSN_SIZE;
 		break;
 	case EJUSTRETURN:
 		/* nothing to do */
 		break;
 	default:
 		frame->tf_x[0] = error;
 		frame->tf_spsr |= PSR_C;
 		break;
 	}
 }
 
 static void
 freebsd32_setregs(struct thread *td, struct image_params *imgp,
    uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	/*
 	 * We need to set x0 for init as it doesn't call
 	 * cpu_set_syscall_retval to copy the value. We also
 	 * need to set td_retval for the cases where we do.
 	 */
 	tf->tf_x[0] = stack;
 	/* SP_usr is mapped to x13 */
 	tf->tf_x[13] = stack;
 	/* LR_usr is mapped to x14 */
 	tf->tf_x[14] = imgp->entry_addr;
 	tf->tf_elr = imgp->entry_addr;
 	tf->tf_spsr = PSR_M_32;
 	if ((uint32_t)imgp->entry_addr & 1)
 		tf->tf_spsr |= PSR_T;
 
 #ifdef VFP
 	vfp_reset_state(td, pcb);
 #endif
 
 	/*
 	 * Clear debug register state. It is not applicable to the new process.
 	 */
 	bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
 }
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 }
diff --git a/sys/arm64/arm64/freebsd32_machdep.c b/sys/arm64/arm64/freebsd32_machdep.c
index 9b62802efbc5..5fadef74df87 100644
--- a/sys/arm64/arm64/freebsd32_machdep.c
+++ b/sys/arm64/arm64/freebsd32_machdep.c
@@ -1,459 +1,460 @@
 /*-
  * Copyright (c) 2018 Olivier Houchard
  * Copyright (c) 2017 Nuxi, https://nuxi.nl/
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/syscallsubr.h>
 #include <sys/ktr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <machine/armreg.h>
+#include <machine/pcb.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 _Static_assert(sizeof(mcontext32_t) == 208, "mcontext32_t size incorrect");
 _Static_assert(sizeof(ucontext32_t) == 260, "ucontext32_t size incorrect");
 _Static_assert(sizeof(struct siginfo32) == 64, "struct siginfo32 size incorrect");
 
 extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 
 /*
  * The first two fields of a ucontext_t are the signal mask and the machine
  * context.  The next field is uc_link; we want to avoid destroying the link
  * when copying out contexts.
  */
 #define UC32_COPY_SIZE  offsetof(ucontext32_t, uc_link)
 
 /*
  * Stubs for machine dependent 32-bits system calls.
  */
 
 int
 freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
 {
 	int error;
 
 #define ARM_SYNC_ICACHE		0
 #define ARM_DRAIN_WRITEBUF	1
 #define ARM_SET_TP		2
 #define ARM_GET_TP		3
 #define ARM_GET_VFPSTATE	4
 
 	switch(uap->op) {
 	case ARM_SET_TP:
 		WRITE_SPECIALREG(tpidr_el0, uap->parms);
 		WRITE_SPECIALREG(tpidrro_el0, uap->parms);
 		return 0;
 	case ARM_SYNC_ICACHE:
 		{
 			struct {
 				uint32_t addr;
 				uint32_t size;
 			} args;
 
 			if ((error = copyin(uap->parms, &args, sizeof(args))) != 0)
 				return (error);
 			if ((uint64_t)args.addr + (uint64_t)args.size > 0xffffffff)
 				return (EINVAL);
 			cpu_icache_sync_range_checked(args.addr, args.size);
 			return 0;
 		}
 	case ARM_GET_VFPSTATE:
 		{
 			mcontext32_vfp_t mcontext_vfp;
 
 			struct {
 				uint32_t mc_vfp_size;
 				uint32_t mc_vfp;
 			} args;
 			if ((error = copyin(uap->parms, &args, sizeof(args))) != 0)
 				return (error);
 			if (args.mc_vfp_size != sizeof(mcontext_vfp))
 				return (EINVAL);
 #ifdef VFP
 			get_fpcontext32(td, &mcontext_vfp);
 #else
 			bzero(&mcontext_vfp, sizeof(mcontext_vfp));
 #endif
 			error = copyout(&mcontext_vfp,
 				(void *)(uintptr_t)args.mc_vfp,
 				sizeof(mcontext_vfp));
 			return error;
 		}
 	}
 
 	return (EINVAL);
 }
 
 #ifdef VFP
 void
 get_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp)
 {
 	struct pcb *pcb;
 	int i;
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 
 	memset(mcp, 0, sizeof(*mcp));
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			vfp_save_state(td, pcb);
 
 		KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 		    ("Called get_fpcontext32 while the kernel is using the VFP"));
 		KASSERT((pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPU flags set in get_fpcontext32"));
 		for (i = 0; i < 32; i++)
 			mcp->mcv_reg[i] = (uint64_t)pcb->pcb_fpustate.vfp_regs[i];
 		mcp->mcv_fpscr = VFP_FPSCR_FROM_SRCR(pcb->pcb_fpustate.vfp_fpcr,
 		    pcb->pcb_fpustate.vfp_fpsr);
 	}
 }
 
 void
 set_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp)
 {
 	struct pcb *pcb;
 	int i;
 
 	critical_enter();
 	pcb = td->td_pcb;
 	if (td == curthread)
 		vfp_discard(td);
 	for (i = 0; i < 32; i++)
 		pcb->pcb_fpustate.vfp_regs[i] = mcp->mcv_reg[i];
 	pcb->pcb_fpustate.vfp_fpsr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr);
 	pcb->pcb_fpustate.vfp_fpcr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr);
 	critical_exit();
 }
 #endif
 
 static void
 get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	struct trapframe *tf;
 	int i;
 
 	tf = td->td_frame;
 
 	if ((flags & GET_MC_CLEAR_RET) != 0) {
 		mcp->mc_gregset[0] = 0;
 		mcp->mc_gregset[16] = tf->tf_spsr & ~PSR_C;
 	} else {
 		mcp->mc_gregset[0] = tf->tf_x[0];
 		mcp->mc_gregset[16] = tf->tf_spsr;
 	}
 	for (i = 1; i < 15; i++)
 		mcp->mc_gregset[i] = tf->tf_x[i];
 	mcp->mc_gregset[15] = tf->tf_elr;
 
 	mcp->mc_vfp_size = 0;
 	mcp->mc_vfp_ptr = 0;
 
 	memset(mcp->mc_spare, 0, sizeof(mcp->mc_spare));
 }
 
 static int
 set_mcontext32(struct thread *td, mcontext32_t *mcp)
 {
 	struct trapframe *tf;
 	mcontext32_vfp_t mc_vfp;
 	uint32_t spsr;
 	int i;
 
 	tf = td->td_frame;
 
 	spsr = mcp->mc_gregset[16];
 	/*
 	 * There is no PSR_SS in the 32-bit kernel so ignore it if it's set
 	 * as we will set it later if needed.
 	 */
 	if ((spsr & ~(PSR_SETTABLE_32 | PSR_SS)) !=
 	    (tf->tf_spsr & ~(PSR_SETTABLE_32 | PSR_SS)))
 		return (EINVAL);
 
 	spsr &= PSR_SETTABLE_32;
 	spsr |= tf->tf_spsr & ~PSR_SETTABLE_32;
 
 	if ((td->td_dbgflags & TDB_STEP) != 0) {
 		spsr |= PSR_SS;
 		td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) | MDSCR_SS);
 	}
 
 	for (i = 0; i < 15; i++)
 		tf->tf_x[i] = mcp->mc_gregset[i];
 	tf->tf_elr = mcp->mc_gregset[15];
 	tf->tf_spsr = spsr;
 #ifdef VFP
 	if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != 0) {
 		if (copyin((void *)(uintptr_t)mcp->mc_vfp_ptr, &mc_vfp,
 					sizeof(mc_vfp)) != 0)
 			return (EFAULT);
 		set_fpcontext32(td, &mc_vfp);
 	}
 #endif
 
 	return (0);
 }
 
 #define UC_COPY_SIZE	offsetof(ucontext32_t, uc_link)
 
 int
 freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		memset(&uc, 0, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->ucp, UC_COPY_SIZE);
 	}
 	return (ret);
 }
 
 int
 freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		ret = copyin(uap->ucp, &uc, UC_COPY_SIZE);
 		if (ret == 0) {
 			ret = set_mcontext32(td, &uc.uc_mcontext);
 			if (ret == 0)
 				kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask,
 						NULL, 0);
 		}
 	}
 	return (ret);
 }
 
 int
 freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
 {
 	ucontext32_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	error = set_mcontext32(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (0);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 
 }
 
 int
 freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->oucp == NULL || uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		bzero(&uc, sizeof(uc));
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 			if (ret == 0) {
 				ret = set_mcontext32(td, &uc.uc_mcontext);
 				kern_sigprocmask(td, SIG_SETMASK,
 						&uc.uc_sigmask, NULL, 0);
 			}
 		}
 	}
 	return (ret);
 }
 
 void
 freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe32 *fp, frame;
 	struct sigacts *psp;
 	struct siginfo32 siginfo;
 	struct sysentvec *sysent;
 	int onstack;
 	int sig;
 
 	siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_x[13]);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe32 *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe32 *)td->td_frame->tf_x[13];
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe32 *)((unsigned long)(fp) &~ (8 - 1));
 	/* Populate the siginfo frame. */
 	get_mcontext32(td, &frame.sf_uc.uc_mcontext, 0);
 #ifdef VFP
 	get_fpcontext32(td, &frame.sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)(uintptr_t)&fp->sf_vfp;
 #else
 	frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)NULL;
 #endif
 	frame.sf_si = siginfo;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK )
 	    ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	frame.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
 	frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_x[0] = sig;
 	tf->tf_x[1] = (register_t)&fp->sf_si;
 	tf->tf_x[2] = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_x[5] = (register_t)&fp->sf_uc;
 	tf->tf_elr = (register_t)catcher;
 	tf->tf_x[13] = (register_t)fp;
 	sysent = p->p_sysent;
 	if (PROC_HAS_SHP(p))
 		tf->tf_x[14] = (register_t)PROC_SIGCODE(p);
 	else
 		tf->tf_x[14] = (register_t)(PROC_PS_STRINGS(p) -
 		    *(sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 
 	/* Clear the single step flag while in the signal handler */
 	if ((td->td_pcb->pcb_flags & PCB_SINGLE_STEP) != 0) {
 		td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 		WRITE_SPECIALREG(mdscr_el1,
 		    READ_SPECIALREG(mdscr_el1) & ~MDSCR_SS);
 		isb();
 	}
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_x[14],
 	    tf->tf_x[13]);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 
 }
 
 #ifdef COMPAT_43
 /*
  * Mirror the osigreturn definition in kern_sig.c for !i386 platforms. This
  * mirrors what's connected to the FreeBSD/arm syscall.
  */
 int
 ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap)
 {
 
 	return (nosys(td, (struct nosys_args *)uap));
 }
 #endif
diff --git a/sys/arm64/arm64/ptrace_machdep.c b/sys/arm64/arm64/ptrace_machdep.c
index 01135978b39a..079391ac102c 100644
--- a/sys/arm64/arm64/ptrace_machdep.c
+++ b/sys/arm64/arm64/ptrace_machdep.c
@@ -1,191 +1,192 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/elf.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 
 #include <machine/armreg.h>
+#include <machine/pcb.h>
 
 /* Only used to get/set 32bits VFP regs */
 int
 cpu_ptrace(struct thread *td, int req, void *arg, int data)
 {
 #if defined(VFP) && defined(COMPAT_FREEBSD32)
 	mcontext32_vfp_t vfp;
 	int error;
 
 	if (!SV_CURPROC_FLAG(SV_ILP32))
 		return (EINVAL);
 	switch (req) {
 		case PT_GETVFPREGS32:
 			get_fpcontext32(td, &vfp);
 			error = copyout(&vfp, arg, sizeof(vfp));
 			break;
 		case PT_SETVFPREGS32:
 			error = copyin(arg, &vfp, sizeof(vfp));
 			if (error == 0)
 				set_fpcontext32(td, &vfp);
 			break;
 		default:
 			error = EINVAL;
 	}
 
 	return (error);
 #else
 	return (EINVAL);
 #endif
 }
 
 #if defined(VFP) && defined(COMPAT_FREEBSD32)
 static bool
 get_arm_vfp(struct regset *rs, struct thread *td, void *buf, size_t *sizep)
 {
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(mcontext32_vfp_t),
 		    ("%s: invalid size", __func__));
 		get_fpcontext32(td, buf);
 	}
 	*sizep = sizeof(mcontext32_vfp_t);
 	return (true);
 }
 
 static bool
 set_arm_vfp(struct regset *rs, struct thread *td, void *buf,
     size_t size)
 {
 	KASSERT(size == sizeof(mcontext32_vfp_t), ("%s: invalid size",
 	    __func__));
 	set_fpcontext32(td, buf);
 	return (true);
 }
 
 static struct regset regset_arm_vfp = {
 	.note = NT_ARM_VFP,
 	.size = sizeof(mcontext32_vfp_t),
 	.get = get_arm_vfp,
 	.set = set_arm_vfp,
 };
 ELF32_REGSET(regset_arm_vfp);
 #endif
 
 static bool
 get_arm64_tls(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	if (buf != NULL) {
 		KASSERT(*sizep == sizeof(td->td_pcb->pcb_tpidr_el0),
 		    ("%s: invalid size", __func__));
 		memcpy(buf, &td->td_pcb->pcb_tpidr_el0,
 		    sizeof(td->td_pcb->pcb_tpidr_el0));
 	}
 	*sizep = sizeof(td->td_pcb->pcb_tpidr_el0);
 
 	return (true);
 }
 
 static struct regset regset_arm64_tls = {
 	.note = NT_ARM_TLS,
 	.size = sizeof(uint64_t),
 	.get = get_arm64_tls,
 };
 ELF_REGSET(regset_arm64_tls);
 
 #ifdef COMPAT_FREEBSD32
 static bool
 get_arm_tls(struct regset *rs, struct thread *td, void *buf,
     size_t *sizep)
 {
 	if (buf != NULL) {
 		uint32_t tp;
 
 		KASSERT(*sizep == sizeof(uint32_t),
 		    ("%s: invalid size", __func__));
 		tp = (uint32_t)td->td_pcb->pcb_tpidr_el0;
 		memcpy(buf, &tp, sizeof(tp));
 	}
 	*sizep = sizeof(uint32_t);
 
 	return (true);
 }
 
 static struct regset regset_arm_tls = {
 	.note = NT_ARM_TLS,
 	.size = sizeof(uint32_t),
 	.get = get_arm_tls,
 };
 ELF32_REGSET(regset_arm_tls);
 #endif
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_elr = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	if ((td->td_frame->tf_spsr & PSR_SS) == 0) {
 		td->td_frame->tf_spsr |= PSR_SS;
 		td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 		td->td_dbgflags |= TDB_STEP;
 	}
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	td->td_frame->tf_spsr &= ~PSR_SS;
 	td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 	td->td_dbgflags &= ~TDB_STEP;
 	return (0);
 }
 
diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h
index 15361a0e3788..9a22fe43833a 100644
--- a/sys/arm64/include/proc.h
+++ b/sys/arm64/include/proc.h
@@ -1,87 +1,75 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      from: @(#)proc.h        7.1 (Berkeley) 5/15/91
  *	from: FreeBSD: src/sys/i386/include/proc.h,v 1.11 2001/06/29
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PROC_H_
 #define	_MACHINE_PROC_H_
 
 struct ptrauth_key {
 	uint64_t pa_key_lo;
 	uint64_t pa_key_hi;
 };
 
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_daif;	/* (k) */
 	uintptr_t md_canary;
 
 	/*
 	 * The pointer authentication keys. These are shared within a process,
 	 * however this may change for some keys as the PAuth ABI Extension to
 	 * ELF for the Arm 64-bit Architecture [1] is currently (July 2021) at
 	 * an Alpha release quality so may change.
 	 *
 	 * [1] https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst
 	 */
 	struct {
 		struct ptrauth_key apia;
 		struct ptrauth_key apib;
 		struct ptrauth_key apda;
 		struct ptrauth_key apdb;
 		struct ptrauth_key apga;
 	} md_ptrauth_user;
 
 	struct {
 		struct ptrauth_key apia;
 	} md_ptrauth_kern;
 };
 
 struct mdproc {
 	long	md_dummy;
 };
 
 #define	KINFO_PROC_SIZE	1088
 #define	KINFO_PROC32_SIZE 816
 
-#ifdef _KERNEL
-
-#include <machine/pcb.h>
-
-#define	GET_STACK_USAGE(total, used) do {				\
-	struct thread *td = curthread;					\
-	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
-	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
-} while (0)
-
-#endif
-
 #endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/arm64/include/stack.h b/sys/arm64/include/stack.h
index 4b1d190df595..4c4c41bf9516 100644
--- a/sys/arm64/include/stack.h
+++ b/sys/arm64/include/stack.h
@@ -1,42 +1,60 @@
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_STACK_H_
 #define	_MACHINE_STACK_H_
 
 #define	INKERNEL(va) \
 	((va) >= VM_MIN_KERNEL_ADDRESS && (va) <= VM_MAX_KERNEL_ADDRESS)
 
 struct unwind_state {
 	uintptr_t fp;
 	uintptr_t pc;
 };
 
 bool unwind_frame(struct thread *, struct unwind_state *);
 
+#ifdef _SYS_PROC_H_
+
+#include <machine/pcb.h>
+
+#define	GET_STACK_USAGE(total, used) do {				\
+	struct thread *td = curthread;					\
+	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
+	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
+} while (0)
+
+static __inline bool
+kstack_contains(struct thread *td, vm_offset_t va, size_t len)
+{
+	return (va >= td->td_kstack && va + len >= va &&
+	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
+}
+#endif	/* _SYS_PROC_H_ */
+
 #endif /* !_MACHINE_STACK_H_ */
diff --git a/sys/arm64/linux/linux_sysvec.c b/sys/arm64/linux/linux_sysvec.c
index 41ac2912be29..9a82dc94b6ac 100644
--- a/sys/arm64/linux/linux_sysvec.c
+++ b/sys/arm64/linux/linux_sysvec.c
@@ -1,812 +1,812 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1994-1996 Søren Schmidt
  * Copyright (c) 2018 Turing Robotic Industries Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cdefs.h>
 #include <sys/elf.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/stddef.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <arm64/linux/linux.h>
 #include <arm64/linux/linux_proto.h>
 #include <compat/linux/linux_dtrace.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_fork.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 #include <arm64/linux/linux_sigframe.h>
 
 #include <machine/md_var.h>
-
+#include <machine/pcb.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 MODULE_VERSION(linux64elf, 1);
 
 #define	LINUX_VDSOPAGE_SIZE	PAGE_SIZE * 2
 #define	LINUX_VDSOPAGE		(VM_MAXUSER_ADDRESS - \
 				    LINUX_VDSOPAGE_SIZE)
 #define	LINUX_SHAREDPAGE	(LINUX_VDSOPAGE - PAGE_SIZE)
 				/*
 				 * PAGE_SIZE - the size
 				 * of the native SHAREDPAGE
 				 */
 #define	LINUX_USRSTACK		LINUX_SHAREDPAGE
 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - \
 				    sizeof(struct ps_strings))
 
 static int linux_szsigcode;
 static vm_object_t linux_vdso_obj;
 static char *linux_vdso_mapping;
 extern char _binary_linux_vdso_so_o_start;
 extern char _binary_linux_vdso_so_o_end;
 static vm_offset_t linux_vdso_base;
 
 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
 extern const char *linux_syscallnames[];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	linux_copyout_strings(struct image_params *imgp,
 		    uintptr_t *stack_base);
 static int	linux_elf_fixup(uintptr_t *stack_base,
 		    struct image_params *iparams);
 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
 static void	linux_vdso_reloc(char *mapping, Elf_Addr offset);
 static void	linux_set_syscall_retval(struct thread *td, int error);
 static int	linux_fetch_syscall_args(struct thread *td);
 static void	linux_exec_setregs(struct thread *td, struct image_params *imgp,
 		    uintptr_t stack);
 static void	linux_exec_sysvec_init(void *param);
 static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
 
 /* DTrace init */
 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
 
 /* DTrace probes */
 LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo);
 LIN_SDT_PROBE_DEFINE0(sysvec, linux_copyout_auxargs, todo);
 LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo);
 
 LINUX_VDSO_SYM_CHAR(linux_platform);
 LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
 LINUX_VDSO_SYM_INTPTR(linux_vdso_sigcode);
 
 static int
 linux_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct syscall_args *sa;
 	register_t *ap;
 
 	p = td->td_proc;
 	ap = td->td_frame->tf_x;
 	sa = &td->td_sa;
 
 	sa->code = td->td_frame->tf_x[8];
 	sa->original_code = sa->code;
 	/* LINUXTODO: generic syscall? */
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	if (sa->callp->sy_narg > nitems(sa->args))
 		panic("ARM64TODO: Could we have more than %zu args?",
 		    nitems(sa->args));
 	memcpy(sa->args, ap, nitems(sa->args) * sizeof(register_t));
 
 	td->td_retval[0] = 0;
 	return (0);
 }
 
 static void
 linux_set_syscall_retval(struct thread *td, int error)
 {
 
 	td->td_retval[1] = td->td_frame->tf_x[1];
 	cpu_set_syscall_retval(td, error);
 
 	if (__predict_false(error != 0)) {
 		if (error != ERESTART && error != EJUSTRETURN)
 			td->td_frame->tf_x[0] = bsd_to_linux_errno(error);
 	}
 }
 
 static int
 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
 {
 	Elf_Auxargs *args;
 	Elf_Auxinfo *argarray, *pos;
 	struct proc *p;
 	int error, issetugid;
 
 	LIN_SDT_PROBE0(sysvec, linux_copyout_auxargs, todo);
 	p = imgp->proc;
 
 	args = (Elf64_Auxargs *)imgp->auxargs;
 	argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
 	    M_WAITOK | M_ZERO);
 
 	issetugid = p->p_flag & P_SUGID ? 1 : 0;
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_MINSIGSTKSZ, LINUX_MINSIGSTKSZ);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, *imgp->sysent->sv_hwcap);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
 	AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP2, *imgp->sysent->sv_hwcap2);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp);
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 	KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
 
 	error = copyout(argarray, (void *)base,
 	    sizeof(*argarray) * LINUX_AT_COUNT);
 	free(argarray, M_TEMP);
 	return (error);
 }
 
 static int
 linux_elf_fixup(uintptr_t *stack_base, struct image_params *imgp)
 {
 
 	LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo);
 
 	return (0);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  * LINUXTODO: deduplicate against other linuxulator archs
  */
 static int
 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 	struct proc *p;
 	int argc, envc, error;
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)PROC_PS_STRINGS(p);
 	destp = (uintptr_t)arginfo;
 
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Prepare the canary for SSP. */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= roundup(sizeof(canary), sizeof(void *));
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 
 	/* Allocate room for the argument and environment strings. */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to LINUX_AT_COUNT entries.
 		 */
 		destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for argc and the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1;
 	vectp = (char **)STACKALIGN(vectp);
 
 	/* vectp also becomes our initial stack base. */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/* Copy out strings - arguments and environment. */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/* Fill in "ps_strings" struct for ps, w, etc. */
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	if (suword(vectp++, argc) != 0)
 		return (EFAULT);
 
 	/* Fill in argument portion of vector table. */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* A null vector table pointer separates the argp's from the envp's. */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/* Fill in environment portion of vector table. */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* The end of the vector table is a null pointer. */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 static void
 linux_exec_setregs(struct thread *td, struct image_params *imgp,
     uintptr_t stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* LINUXTODO: validate */
 	LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo);
 
 	memset(regs, 0, sizeof(*regs));
 	/* glibc start.S registers function pointer in x0 with atexit. */
         regs->tf_sp = stack;
 #if 0	/* LINUXTODO: See if this is used. */
 	regs->tf_lr = imgp->entry_addr;
 #else
         regs->tf_lr = 0xffffffffffffffff;
 #endif
         regs->tf_elr = imgp->entry_addr;
 
 	pcb->pcb_tpidr_el0 = 0;
 	pcb->pcb_tpidrro_el0 = 0;
 	WRITE_SPECIALREG(tpidrro_el0, 0);
 	WRITE_SPECIALREG(tpidr_el0, 0);
 
 #ifdef VFP
 	vfp_reset_state(td, pcb);
 #endif
 
 	/*
 	 * Clear debug register state. It is not applicable to the new process.
 	 */
 	bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
 }
 
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct l_sigframe *frame;
 	ucontext_t uc;
 	struct trapframe *tf;
 	int error;
 
 	tf = td->td_frame;
 	frame = (struct l_sigframe *)tf->tf_sp;
 
 	if (copyin((void *)&frame->uc, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct l_sigframe *fp, *frame;
 	struct l_fpsimd_context *fpsimd;
 	struct l_esr_context *esr;
 	l_stack_t uc_stack;
 	ucontext_t uc;
 	uint8_t *scr;
 	struct sigacts *psp;
 	int onstack, sig, issiginfo;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 	issiginfo = SIGISMEMBER(psp->ps_siginfo, sig);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else {
 		fp = (struct l_sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct l_sigframe *)STACKALIGN(fp);
 
 	get_mcontext(td, &uc.uc_mcontext, 0);
 	uc.uc_sigmask = *mask;
 
 	uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	uc_stack.ss_size = td->td_sigstk.ss_size;
 	uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Fill in the frame to copy out */
 	frame = malloc(sizeof(*frame), M_LINUX, M_WAITOK | M_ZERO);
 
 	memcpy(&frame->sf.sf_uc.uc_sc.regs, tf->tf_x, sizeof(tf->tf_x));
 	frame->sf.sf_uc.uc_sc.regs[30] = tf->tf_lr;
 	frame->sf.sf_uc.uc_sc.sp = tf->tf_sp;
 	frame->sf.sf_uc.uc_sc.pc = tf->tf_lr;
 	frame->sf.sf_uc.uc_sc.pstate = tf->tf_spsr;
 	frame->sf.sf_uc.uc_sc.fault_address = (register_t)ksi->ksi_addr;
 
 	/* Stack frame for unwinding */
 	frame->fp = tf->tf_x[29];
 	frame->lr = tf->tf_lr;
 
 	/* Translate the signal. */
 	sig = bsd_to_linux_signal(sig);
 	siginfo_to_lsiginfo(&ksi->ksi_info, &frame->sf.sf_si, sig);
 	bsd_to_linux_sigset(mask, &frame->sf.sf_uc.uc_sigmask);
 
 	/*
 	 * Prepare fpsimd & esr. Does not check sizes, as
 	 * __reserved is big enougth.
 	 */
 	scr = (uint8_t *)&frame->sf.sf_uc.uc_sc.__reserved;
 #ifdef VFP
 	fpsimd = (struct l_fpsimd_context *) scr;
 	fpsimd->head.magic = L_FPSIMD_MAGIC;
 	fpsimd->head.size = sizeof(struct l_fpsimd_context);
 	fpsimd->fpsr = uc.uc_mcontext.mc_fpregs.fp_sr;
 	fpsimd->fpcr = uc.uc_mcontext.mc_fpregs.fp_cr;
 
 	memcpy(fpsimd->vregs, &uc.uc_mcontext.mc_fpregs.fp_q,
 	    sizeof(uc.uc_mcontext.mc_fpregs.fp_q));
 	scr += roundup(sizeof(struct l_fpsimd_context), 16);
 #endif
 	if (ksi->ksi_addr != 0) {
 		esr = (struct l_esr_context *) scr;
 		esr->head.magic = L_ESR_MAGIC;
 		esr->head.size = sizeof(struct l_esr_context);
 		esr->esr = tf->tf_esr;
 	}
 
 	memcpy(&frame->sf.sf_uc.uc_stack, &uc_stack, sizeof(uc_stack));
 	memcpy(&frame->uc, &uc, sizeof(uc));
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		free(frame, M_LINUX);
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 	free(frame, M_LINUX);
 
 	tf->tf_x[0]= sig;
 	if (issiginfo) {
 		tf->tf_x[1] = (register_t)&fp->sf.sf_si;
 		tf->tf_x[2] = (register_t)&fp->sf.sf_uc;
 	} else {
 		tf->tf_x[1] = 0;
 		tf->tf_x[2] = 0;
 	}
 	tf->tf_x[8] = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 	tf->tf_elr = (register_t)linux_vdso_sigcode;
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_fixup	= linux_elf_fixup,
 	.sv_sendsig	= linux_rt_sendsig,
 	.sv_sigcode	= &_binary_linux_vdso_so_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF64",
 	.sv_coredump	= elf64_coredump,
 	.sv_elf_core_osabi = ELFOSABI_NONE,
 	.sv_elf_core_abi_vendor = LINUX_ABI_VENDOR,
 	.sv_elf_core_prepare_notes = linux64_prepare_notes,
 	.sv_imgact_try	= linux_exec_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= LINUX_USRSTACK,
 	.sv_psstrings	= LINUX_PS_STRINGS,
 	.sv_psstringssz	= sizeof(struct ps_strings),
 	.sv_stackprot	= VM_PROT_READ | VM_PROT_WRITE,
 	.sv_copyout_auxargs = linux_copyout_auxargs,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
 	    SV_SIG_WAITNDQ | SV_TIMEKEEP,
 	.sv_set_syscall_retval = linux_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = linux_syscallnames,
 	.sv_shared_page_base = LINUX_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
 	.sv_trap	= NULL,
 	.sv_hwcap	= &elf_hwcap,
 	.sv_hwcap2	= &elf_hwcap2,
 	.sv_onexec	= linux_on_exec_vmspace,
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
 };
 
 static int
 linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
 {
 	int error;
 
 	error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
 	    LINUX_VDSOPAGE_SIZE, imgp);
 	if (error == 0)
 		linux_on_exec(p, imgp);
 	return (error);
 }
 
 /*
  * linux_vdso_install() and linux_exec_sysvec_init() must be called
  * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
  */
 static void
 linux_exec_sysvec_init(void *param)
 {
 	l_uintptr_t *ktimekeep_base;
 	struct sysentvec *sv;
 	ptrdiff_t tkoff;
 
 	sv = param;
 	/* Fill timekeep_base */
 	exec_sysvec_init(sv);
 
 	tkoff = kern_timekeep_base - linux_vdso_base;
 	ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
 	*ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
 }
 SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
     linux_exec_sysvec_init, &elf_linux_sysvec);
 
 static void
 linux_vdso_install(const void *param)
 {
 	char *vdso_start = &_binary_linux_vdso_so_o_start;
 	char *vdso_end = &_binary_linux_vdso_so_o_end;
 
 	linux_szsigcode = vdso_end - vdso_start;
 	MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
 
 	linux_vdso_base = LINUX_VDSOPAGE;
 
 	__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
 
 	linux_vdso_obj = __elfN(linux_shared_page_init)
 	    (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 	bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
 
 	linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
     linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(const void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_vdso_obj,
 	    linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
 }
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     linux_vdso_deinstall, NULL);
 
 static void
 linux_vdso_reloc(char *mapping, Elf_Addr offset)
 {
 	Elf_Size rtype, symidx;
 	const Elf_Rela *rela;
 	const Elf_Shdr *shdr;
 	const Elf_Ehdr *ehdr;
 	Elf_Addr *where;
 	Elf_Addr addr, addend;
 	int i, relacnt;
 
 	MPASS(offset != 0);
 
 	relacnt = 0;
 	ehdr = (const Elf_Ehdr *)mapping;
 	shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
 	for (i = 0; i < ehdr->e_shnum; i++)
 	{
 		switch (shdr[i].sh_type) {
 		case SHT_REL:
 			printf("Linux Aarch64 vDSO: unexpected Rel section\n");
 			break;
 		case SHT_RELA:
 			rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
 			relacnt = shdr[i].sh_size / sizeof(*rela);
 		}
 	}
 
 	for (i = 0; i < relacnt; i++, rela++) {
 		where = (Elf_Addr *)(mapping + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 
 		switch (rtype) {
 		case R_AARCH64_NONE:	/* none */
 			break;
 
 		case R_AARCH64_RELATIVE:	/* B + A */
 			addr = (Elf_Addr)(mapping + addend);
 			if (*where != addr)
 				*where = addr;
 			break;
 		default:
 			printf("Linux Aarch64 vDSO: unexpected relocation type %ld, "
 			    "symbol index %ld\n", rtype, symidx);
 		}
 	}
 }
 
 static char GNU_ABI_VENDOR[] = "GNU";
 static int GNU_ABI_LINUX = 0;
 
 /* LINUXTODO: deduplicate */
 static bool
 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNU_ABI_LINUX)
 		return (false);
 
 	*osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
 	return (true);
 }
 
 static Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux_trans_osrel
 };
 
 static Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_AARCH64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= linux_emul_path,
 	.interp_path	= "/lib64/ld-linux-x86-64.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	NULL
 };
 
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf64_Brandinfo **brandinfo;
 	struct linux_ioctl_handler**lihp;
 	int error;
 
 	error = 0;
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		    ++brandinfo)
 			if (elf64_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux arm64 ELF exec handler installed\n");
 		}
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		    ++brandinfo)
 			if (elf64_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			    *brandinfo != NULL; ++brandinfo)
 				if (elf64_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			if (bootverbose)
 				printf("Linux arm64 ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall Linux arm64 ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux64_elf_mod = {
 	"linux64elf",
 	linux64_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1);
 FEATURE(linux64, "AArch64 Linux 64bit support");
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c
index a5245528ca83..5d713264d975 100644
--- a/sys/ddb/db_ps.c
+++ b/sys/ddb/db_ps.c
@@ -1,533 +1,535 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/cons.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <ddb/ddb.h>
 
+#include <machine/stack.h>
+
 #define PRINT_NONE	0
 #define PRINT_ARGS	1
 
 static void	dumpthread(volatile struct proc *p, volatile struct thread *td,
 		    int all);
 static void	db_ps_proc(struct proc *p);
 static int	ps_mode;
 
 /*
  * At least one non-optional show-command must be implemented using
  * DB_SHOW_ALL_COMMAND() so that db_show_all_cmd_set gets created.
  * Here is one.
  */
 DB_SHOW_ALL_COMMAND(procs, db_procs_cmd)
 {
 	db_ps(addr, have_addr, count, modif);
 }
 
 static void
 dump_args(volatile struct proc *p)
 {
 	char *args;
 	int i, len;
 
 	if (p->p_args == NULL)
 		return;
 	args = p->p_args->ar_args;
 	len = (int)p->p_args->ar_length;
 	for (i = 0; i < len; i++) {
 		if (args[i] == '\0')
 			db_printf(" ");
 		else
 			db_printf("%c", args[i]);
 	}
 }
 
 /*
  * Layout:
  * - column counts
  * - header
  * - single-threaded process
  * - multi-threaded process
  * - thread in a MT process
  *
  *          1         2         3         4         5         6         7
  * 1234567890123456789012345678901234567890123456789012345678901234567890
  *   pid  ppid  pgrp   uid  state   wmesg   wchan       cmd
  * <pid> <ppi> <pgi> <uid>  <stat>  <wmesg> <wchan   >  <name>
  * <pid> <ppi> <pgi> <uid>  <stat>  (threaded)          <command>
  * <tid >                   <stat>  <wmesg> <wchan   >  <name>
  *
  * For machines with 64-bit pointers, we expand the wchan field 8 more
  * characters.
  */
 void
 db_ps(db_expr_t addr, bool hasaddr, db_expr_t count, char *modif)
 {
 	struct proc *p;
 	int i;
 
 	ps_mode = modif[0] == 'a' ? PRINT_ARGS : PRINT_NONE;
 
 #ifdef __LP64__
 	db_printf("  pid  ppid  pgrp   uid  state   wmesg   wchan               cmd\n");
 #else
 	db_printf("  pid  ppid  pgrp   uid  state   wmesg   wchan       cmd\n");
 #endif
 
 	if (!LIST_EMPTY(&allproc))
 		p = LIST_FIRST(&allproc);
 	else
 		p = &proc0;
 	for (; p != NULL && !db_pager_quit; p = LIST_NEXT(p, p_list))
 		db_ps_proc(p);
 
 	/*
 	 * Processes such as zombies not in allproc.
 	 */
 	for (i = 0; i <= pidhash && !db_pager_quit; i++) {
 		LIST_FOREACH(p, &pidhashtbl[i], p_hash) {
 			if (p->p_list.le_prev == NULL)
 				db_ps_proc(p);
 		}
 	}
 }
 
 static void
 db_ps_proc(struct proc *p)
 {
 	volatile struct proc *pp;
 	volatile struct thread *td;
 	struct ucred *cred;
 	struct pgrp *pgrp;
 	char state[9];
 	int rflag, sflag, dflag, lflag, wflag;
 
 	pp = p->p_pptr;
 	if (pp == NULL)
 		pp = p;
 
 	cred = p->p_ucred;
 	pgrp = p->p_pgrp;
 	db_printf("%5d %5d %5d %5d ", p->p_pid, pp->p_pid,
 	    pgrp != NULL ? pgrp->pg_id : 0,
 	    cred != NULL ? cred->cr_ruid : 0);
 
 	/* Determine our primary process state. */
 	switch (p->p_state) {
 	case PRS_NORMAL:
 		if (P_SHOULDSTOP(p))
 			state[0] = 'T';
 		else {
 			/*
 			 * One of D, L, R, S, W.  For a
 			 * multithreaded process we will use
 			 * the state of the thread with the
 			 * highest precedence.  The
 			 * precendence order from high to low
 			 * is R, L, D, S, W.  If no thread is
 			 * in a sane state we use '?' for our
 			 * primary state.
 			 */
 			rflag = sflag = dflag = lflag = wflag = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
 				if (TD_GET_STATE(td) == TDS_RUNNING ||
 				    TD_GET_STATE(td) == TDS_RUNQ ||
 				    TD_GET_STATE(td) == TDS_CAN_RUN)
 					rflag++;
 				if (TD_ON_LOCK(td))
 					lflag++;
 				if (TD_IS_SLEEPING(td)) {
 					if (!(td->td_flags & TDF_SINTR))
 						dflag++;
 					else
 						sflag++;
 				}
 				if (TD_AWAITING_INTR(td))
 					wflag++;
 			}
 			if (rflag)
 				state[0] = 'R';
 			else if (lflag)
 				state[0] = 'L';
 			else if (dflag)
 				state[0] = 'D';
 			else if (sflag)
 				state[0] = 'S';
 			else if (wflag)
 				state[0] = 'W';
 			else
 				state[0] = '?';
 		}
 		break;
 	case PRS_NEW:
 		state[0] = 'N';
 		break;
 	case PRS_ZOMBIE:
 		state[0] = 'Z';
 		break;
 	default:
 		state[0] = 'U';
 		break;
 	}
 	state[1] = '\0';
 
 	/* Additional process state flags. */
 	if (!(p->p_flag & P_INMEM))
 		strlcat(state, "W", sizeof(state));
 	if (p->p_flag & P_TRACED)
 		strlcat(state, "X", sizeof(state));
 	if (p->p_flag & P_WEXIT && p->p_state != PRS_ZOMBIE)
 		strlcat(state, "E", sizeof(state));
 	if (p->p_flag & P_PPWAIT)
 		strlcat(state, "V", sizeof(state));
 	if (p->p_flag & P_SYSTEM || p->p_lock > 0)
 		strlcat(state, "L", sizeof(state));
 	if (p->p_pgrp != NULL && p->p_session != NULL &&
 	    SESS_LEADER(p))
 		strlcat(state, "s", sizeof(state));
 	/* Cheated here and didn't compare pgid's. */
 	if (p->p_flag & P_CONTROLT)
 		strlcat(state, "+", sizeof(state));
 	if (cred != NULL && jailed(cred))
 		strlcat(state, "J", sizeof(state));
 	db_printf(" %-6.6s ", state);
 	if (p->p_flag & P_HADTHREADS) {
 #ifdef __LP64__
 		db_printf(" (threaded)                  ");
 #else
 		db_printf(" (threaded)          ");
 #endif
 		if (p->p_flag & P_SYSTEM)
 			db_printf("[");
 		db_printf("%s", p->p_comm);
 		if (p->p_flag & P_SYSTEM)
 			db_printf("]");
 		if (ps_mode == PRINT_ARGS) {
 			db_printf(" ");
 			dump_args(p);
 		}
 		db_printf("\n");
 	}
 	FOREACH_THREAD_IN_PROC(p, td) {
 		dumpthread(p, td, p->p_flag & P_HADTHREADS);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 static void
 dumpthread(volatile struct proc *p, volatile struct thread *td, int all)
 {
 	char state[9], wprefix;
 	const char *wmesg;
 	const void *wchan;
 
 	if (all) {
 		db_printf("%6d                  ", td->td_tid);
 		switch (TD_GET_STATE(td)) {
 		case TDS_RUNNING:
 			snprintf(state, sizeof(state), "Run");
 			break;
 		case TDS_RUNQ:
 			snprintf(state, sizeof(state), "RunQ");
 			break;
 		case TDS_CAN_RUN:
 			snprintf(state, sizeof(state), "CanRun");
 			break;
 		case TDS_INACTIVE:
 			snprintf(state, sizeof(state), "Inactv");
 			break;
 		case TDS_INHIBITED:
 			state[0] = '\0';
 			if (TD_ON_LOCK(td))
 				strlcat(state, "L", sizeof(state));
 			if (TD_IS_SLEEPING(td)) {
 				if (td->td_flags & TDF_SINTR)
 					strlcat(state, "S", sizeof(state));
 				else
 					strlcat(state, "D", sizeof(state));
 			}
 			if (TD_IS_SWAPPED(td))
 				strlcat(state, "W", sizeof(state));
 			if (TD_AWAITING_INTR(td))
 				strlcat(state, "I", sizeof(state));
 			if (TD_IS_SUSPENDED(td))
 				strlcat(state, "s", sizeof(state));
 			if (state[0] != '\0')
 				break;
 		default:
 			snprintf(state, sizeof(state), "???");
 		}			
 		db_printf(" %-6.6s ", state);
 	}
 	wprefix = ' ';
 	if (TD_ON_LOCK(td)) {
 		wprefix = '*';
 		wmesg = td->td_lockname;
 		wchan = td->td_blocked;
 	} else if (TD_ON_SLEEPQ(td)) {
 		wmesg = td->td_wmesg;
 		wchan = td->td_wchan;
 	} else if (TD_IS_RUNNING(td)) {
 		snprintf(state, sizeof(state), "CPU %d", td->td_oncpu);
 		wmesg = state;
 		wchan = NULL;
 	} else {
 		wmesg = "";
 		wchan = NULL;
 	}
 	db_printf("%c%-7.7s ", wprefix, wmesg);
 	if (wchan == NULL)
 #ifdef __LP64__
 		db_printf("%18s  ", "");
 #else
 		db_printf("%10s  ", "");
 #endif
 	else
 		db_printf("%p  ", wchan);
 	if (p->p_flag & P_SYSTEM)
 		db_printf("[");
 	if (td->td_name[0] != '\0')
 		db_printf("%s", td->td_name);
 	else
 		db_printf("%s", td->td_proc->p_comm);
 	if (p->p_flag & P_SYSTEM)
 		db_printf("]");
 	if (ps_mode == PRINT_ARGS && all == 0) {
 		db_printf(" ");
 		dump_args(p);
 	}
 	db_printf("\n");
 }
 
 DB_SHOW_COMMAND(thread, db_show_thread)
 {
 	struct thread *td;
 	struct lock_object *lock;
 	u_int delta;
 	bool comma;
 
 	/* Determine which thread to examine. */
 	if (have_addr)
 		td = db_lookup_thread(addr, false);
 	else
 		td = kdb_thread;
 	lock = (struct lock_object *)td->td_lock;
 
 	db_printf("Thread %d at %p:\n", td->td_tid, td);
 	db_printf(" proc (pid %d): %p\n", td->td_proc->p_pid, td->td_proc);
 	if (td->td_name[0] != '\0')
 		db_printf(" name: %s\n", td->td_name);
 	db_printf(" pcb: %p\n", td->td_pcb);
 	db_printf(" stack: %p-%p\n", (void *)td->td_kstack,
 	    (void *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 1));
 	db_printf(" flags: %#x ", td->td_flags);
 	db_printf(" pflags: %#x\n", td->td_pflags);
 	db_printf(" state: ");
 	switch (TD_GET_STATE(td)) {
 	case TDS_INACTIVE:
 		db_printf("INACTIVE\n");
 		break;
 	case TDS_CAN_RUN:
 		db_printf("CAN RUN\n");
 		break;
 	case TDS_RUNQ:
 		db_printf("RUNQ\n");
 		break;
 	case TDS_RUNNING:
 		db_printf("RUNNING (CPU %d)\n", td->td_oncpu);
 		break;
 	case TDS_INHIBITED:
 		db_printf("INHIBITED: {");
 		comma = false;
 		if (TD_IS_SLEEPING(td)) {
 			db_printf("SLEEPING");
 			comma = true;
 		}
 		if (TD_IS_SUSPENDED(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("SUSPENDED");
 			comma = true;
 		}
 		if (TD_IS_SWAPPED(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("SWAPPED");
 			comma = true;
 		}
 		if (TD_ON_LOCK(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("LOCK");
 			comma = true;
 		}
 		if (TD_AWAITING_INTR(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("IWAIT");
 		}
 		db_printf("}\n");
 		break;
 	default:
 		db_printf("??? (%#x)\n", TD_GET_STATE(td));
 		break;
 	}
 	if (TD_ON_LOCK(td))
 		db_printf(" lock: %s  turnstile: %p\n", td->td_lockname,
 		    td->td_blocked);
 	if (TD_ON_SLEEPQ(td))
 		db_printf(
 	    " wmesg: %s  wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
 		    td->td_wmesg, td->td_wchan,
 		    (long)sbttobt(td->td_sleeptimo).sec,
 		    (uintmax_t)sbttobt(td->td_sleeptimo).frac,
 		    (long)sbttobt(sbinuptime()).sec,
 		    (uintmax_t)sbttobt(sbinuptime()).frac);
 	db_printf(" priority: %d\n", td->td_priority);
 	db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
 	if (td->td_swvoltick != 0) {
 		delta = ticks - td->td_swvoltick;
 		db_printf(" last voluntary switch: %u.%03u s ago\n",
 		    delta / hz, (delta % hz) * 1000 / hz);
 	}
 	if (td->td_swinvoltick != 0) {
 		delta = ticks - td->td_swinvoltick;
 		db_printf(" last involuntary switch: %u.%03u s ago\n",
 		    delta / hz, (delta % hz) * 1000 / hz);
 	}
 }
 
 DB_SHOW_COMMAND(proc, db_show_proc)
 {
 	struct thread *td;
 	struct proc *p;
 	int i;
 
 	/* Determine which process to examine. */
 	if (have_addr)
 		p = db_lookup_proc(addr);
 	else
 		p = kdb_thread->td_proc;
 
 	db_printf("Process %d (%s) at %p:\n", p->p_pid, p->p_comm, p);
 	db_printf(" state: ");
 	switch (p->p_state) {
 	case PRS_NEW:
 		db_printf("NEW\n");
 		break;
 	case PRS_NORMAL:
 		db_printf("NORMAL\n");
 		break;
 	case PRS_ZOMBIE:
 		db_printf("ZOMBIE\n");
 		break;
 	default:
 		db_printf("??? (%#x)\n", p->p_state);
 	}
 	if (p->p_ucred != NULL) {
 		db_printf(" uid: %d  gids: ", p->p_ucred->cr_uid);
 		for (i = 0; i < p->p_ucred->cr_ngroups; i++) {
 			db_printf("%d", p->p_ucred->cr_groups[i]);
 			if (i < (p->p_ucred->cr_ngroups - 1))
 				db_printf(", ");
 		}
 		db_printf("\n");
 	}
 	if (p->p_pptr != NULL)
 		db_printf(" parent: pid %d at %p\n", p->p_pptr->p_pid,
 		    p->p_pptr);
 	if (p->p_leader != NULL && p->p_leader != p)
 		db_printf(" leader: pid %d at %p\n", p->p_leader->p_pid,
 		    p->p_leader);
 	if (p->p_sysent != NULL)
 		db_printf(" ABI: %s\n", p->p_sysent->sv_name);
 	db_printf(" flag: %#x ", p->p_flag);
 	db_printf(" flag2: %#x\n", p->p_flag2);
 	if (p->p_args != NULL) {
 		db_printf(" arguments: ");
 		dump_args(p);
 		db_printf("\n");
 	}
 	db_printf(" reaper: %p reapsubtree: %d\n",
 	    p->p_reaper, p->p_reapsubtree);
 	db_printf(" sigparent: %d\n", p->p_sigparent);
 	db_printf(" vmspace: %p\n", p->p_vmspace);
 	db_printf("   (map %p)\n",
 	    (p->p_vmspace != NULL) ? &p->p_vmspace->vm_map : 0);
 	db_printf("   (map.pmap %p)\n",
 	    (p->p_vmspace != NULL) ? &p->p_vmspace->vm_map.pmap : 0);
 	db_printf("   (pmap %p)\n",
 	    (p->p_vmspace != NULL) ? &p->p_vmspace->vm_pmap : 0);
 	db_printf(" threads: %d\n", p->p_numthreads);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		dumpthread(p, td, 1);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 void
 db_findstack_cmd(db_expr_t addr, bool have_addr, db_expr_t dummy3 __unused,
     char *dummy4 __unused)
 {
 	struct thread *td;
 	vm_offset_t saddr;
 
 	if (have_addr)
 		saddr = addr;
 	else {
 		db_printf("Usage: findstack <address>\n");
 		return;
 	}
 
 	for (td = kdb_thr_first(); td != NULL; td = kdb_thr_next(td)) {
 		if (kstack_contains(td, saddr, 1)) {
 			db_printf("Thread %p\n", td);
 			return;
 		}
 	}
 }
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index 20e31b9b921b..777f698b6f1f 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -1,1080 +1,1081 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/vmem.h>
+#include <machine/stack.h>
 #include <machine/stdarg.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <sys/devicestat.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static int	g_io_transient_map_bio(struct bio *bp);
 
 static struct g_bioq g_bio_run_down;
 static struct g_bioq g_bio_run_up;
 
 /*
  * Pace is a hint that we've had some trouble recently allocating
  * bios, so we should back off trying to send I/O down the stack
  * a bit to let the problem resolve. When pacing, we also turn
  * off direct dispatch to also reduce memory pressure from I/Os
  * there, at the expxense of some added latency while the memory
  * pressures exist. See g_io_schedule_down() for more details
  * and limitations.
  */
 static volatile u_int __read_mostly pace;
 
 static uma_zone_t __read_mostly biozone;
 
 #include <machine/atomic.h>
 
 static void
 g_bioq_lock(struct g_bioq *bq)
 {
 
 	mtx_lock(&bq->bio_queue_lock);
 }
 
 static void
 g_bioq_unlock(struct g_bioq *bq)
 {
 
 	mtx_unlock(&bq->bio_queue_lock);
 }
 
 #if 0
 static void
 g_bioq_destroy(struct g_bioq *bq)
 {
 
 	mtx_destroy(&bq->bio_queue_lock);
 }
 #endif
 
 static void
 g_bioq_init(struct g_bioq *bq)
 {
 
 	TAILQ_INIT(&bq->bio_queue);
 	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
 }
 
 static struct bio *
 g_bioq_first(struct g_bioq *bq)
 {
 	struct bio *bp;
 
 	bp = TAILQ_FIRST(&bq->bio_queue);
 	if (bp != NULL) {
 		KASSERT((bp->bio_flags & BIO_ONQUEUE),
 		    ("Bio not on queue bp=%p target %p", bp, bq));
 		bp->bio_flags &= ~BIO_ONQUEUE;
 		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
 		bq->bio_queue_length--;
 	}
 	return (bp);
 }
 
 struct bio *
 g_new_bio(void)
 {
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_new_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return (bp);
 }
 
 struct bio *
 g_alloc_bio(void)
 {
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return (bp);
 }
 
 void
 g_destroy_bio(struct bio *bp)
 {
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	uma_zfree(biozone, bp);
 }
 
 struct bio *
 g_clone_bio(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
 	if (bp2 != NULL) {
 		bp2->bio_parent = bp;
 		bp2->bio_cmd = bp->bio_cmd;
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
 		 *  BIO_UNMAPPED, BIO_VLIST, and BIO_SWAP should be inherited,
 		 *  to properly indicate which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
 		bp2->bio_flags = bp->bio_flags &
 		    (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST | BIO_SWAP);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
 		bp2->bio_ma = bp->bio_ma;
 		bp2->bio_ma_n = bp->bio_ma_n;
 		bp2->bio_ma_offset = bp->bio_ma_offset;
 		bp2->bio_attribute = bp->bio_attribute;
 		if (bp->bio_cmd == BIO_ZONE)
 			bcopy(&bp->bio_zone, &bp2->bio_zone,
 			    sizeof(bp->bio_zone));
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		bp2->bio_track_bp = bp->bio_track_bp;
 #endif
 		bp->bio_children++;
 	}
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return(bp2);
 }
 
 struct bio *
 g_duplicate_bio(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
 	bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST | BIO_SWAP);
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
 	bp2->bio_offset = bp->bio_offset;
 	bp2->bio_data = bp->bio_data;
 	bp2->bio_ma = bp->bio_ma;
 	bp2->bio_ma_n = bp->bio_ma_n;
 	bp2->bio_ma_offset = bp->bio_ma_offset;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return(bp2);
 }
 
 void
 g_reset_bio(struct bio *bp)
 {
 
 	bzero(bp, sizeof(*bp));
 }
 
 void
 g_io_init(void)
 {
 
 	g_bioq_init(&g_bio_run_down);
 	g_bioq_init(&g_bio_run_up);
 	biozone = uma_zcreate("g_bio", sizeof (struct bio),
 	    NULL, NULL,
 	    NULL, NULL,
 	    0, 0);
 }
 
 int
 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_GETATTR;
 	bp->bio_done = NULL;
 	bp->bio_attribute = attr;
 	bp->bio_length = *len;
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	error = biowait(bp, "ggetattr");
 	*len = bp->bio_completed;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 int
 g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_zone(%d)", zone_args->zone_cmd);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_ZONE;
 	bp->bio_done = NULL;
 	/*
 	 * XXX KDM need to handle report zone data.
 	 */
 	bcopy(zone_args, &bp->bio_zone, sizeof(*zone_args));
 	if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES)
 		bp->bio_length =
 		    zone_args->zone_params.report.entries_allocated *
 		    sizeof(struct disk_zone_rep_entry);
 	else
 		bp->bio_length = 0;
 
 	g_io_request(bp, cp);
 	error = biowait(bp, "gzone");
 	bcopy(&bp->bio_zone, zone_args, sizeof(*zone_args));
 	g_destroy_bio(bp);
 	return (error);
 }
 
 /*
  * Send a BIO_SPEEDUP down the stack. This is used to tell the lower layers that
  * the upper layers have detected a resource shortage. The lower layers are
  * advised to stop delaying I/O that they might be holding for performance
  * reasons and to schedule it (non-trims) or complete it successfully (trims) as
  * quickly as it can. bio_length is the amount of the shortage.  This call
  * should be non-blocking. bio_resid is used to communicate back if the lower
  * layers couldn't find bio_length worth of I/O to schedule or discard. A length
  * of 0 means to do as much as you can (schedule the h/w queues full, discard
  * all trims). flags are a hint from the upper layers to the lower layers what
  * operation should be done.
  */
 int
 g_io_speedup(off_t shortage, u_int flags, size_t *resid, struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT((flags & (BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE)) != 0,
 	    ("Invalid flags passed to g_io_speedup: %#x", flags));
 	g_trace(G_T_BIO, "bio_speedup(%s, %jd, %#x)", cp->provider->name,
 	    (intmax_t)shortage, flags);
 	bp = g_new_bio();
 	if (bp == NULL)
 		return (ENOMEM);
 	bp->bio_cmd = BIO_SPEEDUP;
 	bp->bio_length = shortage;
 	bp->bio_done = NULL;
 	bp->bio_flags |= flags;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gflush");
 	*resid = bp->bio_resid;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 int
 g_io_flush(struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_FLUSH;
 	bp->bio_flags |= BIO_ORDERED;
 	bp->bio_done = NULL;
 	bp->bio_attribute = NULL;
 	bp->bio_offset = cp->provider->mediasize;
 	bp->bio_length = 0;
 	bp->bio_data = NULL;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gflush");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 static int
 g_io_check(struct bio *bp)
 {
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	off_t excess;
 	int error;
 
 	biotrack(bp, __func__);
 
 	cp = bp->bio_from;
 	pp = bp->bio_to;
 
 	/* Fail if access counters dont allow the operation */
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_GETATTR:
 		if (cp->acr == 0)
 			return (EPERM);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		if (cp->acw == 0)
 			return (EPERM);
 		break;
 	case BIO_ZONE:
 		if ((bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES) ||
 		    (bp->bio_zone.zone_cmd == DISK_ZONE_GET_PARAMS)) {
 			if (cp->acr == 0)
 				return (EPERM);
 		} else if (cp->acw == 0)
 			return (EPERM);
 		break;
 	default:
 		return (EPERM);
 	}
 	/* if provider is marked for error, don't disturb. */
 	if (pp->error)
 		return (pp->error);
 	if (cp->flags & G_CF_ORPHAN)
 		return (ENXIO);
 
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/* Zero sectorsize or mediasize is probably a lack of media. */
 		if (pp->sectorsize == 0 || pp->mediasize == 0)
 			return (ENXIO);
 		/* Reject I/O not on sector boundary */
 		if (bp->bio_offset % pp->sectorsize)
 			return (EINVAL);
 		/* Reject I/O not integral sector long */
 		if (bp->bio_length % pp->sectorsize)
 			return (EINVAL);
 		/* Reject requests before or past the end of media. */
 		if (bp->bio_offset < 0)
 			return (EIO);
 		if (bp->bio_offset > pp->mediasize)
 			return (EIO);
 
 		/* Truncate requests to the end of providers media. */
 		excess = bp->bio_offset + bp->bio_length;
 		if (excess > bp->bio_to->mediasize) {
 			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
 			    round_page(bp->bio_ma_offset +
 			    bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
 			    ("excess bio %p too short", bp));
 			excess -= bp->bio_to->mediasize;
 			bp->bio_length -= excess;
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 				bp->bio_ma_n = round_page(bp->bio_ma_offset +
 				    bp->bio_length) / PAGE_SIZE;
 			}
 			if (excess > 0)
 				CTR3(KTR_GEOM, "g_down truncated bio "
 				    "%p provider %s by %d", bp,
 				    bp->bio_to->name, excess);
 		}
 
 		/* Deliver zero length transfers right here. */
 		if (bp->bio_length == 0) {
 			CTR2(KTR_GEOM, "g_down terminated 0-length "
 			    "bp %p provider %s", bp, bp->bio_to->name);
 			return (0);
 		}
 
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
 		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
 		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
 			if ((error = g_io_transient_map_bio(bp)) >= 0)
 				return (error);
 		}
 		break;
 	default:
 		break;
 	}
 	return (EJUSTRETURN);
 }
 
 void
 g_io_request(struct bio *bp, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int direct, error, first;
 	uint8_t cmd;
 
 	biotrack(bp, __func__);
 
 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
 #ifdef DIAGNOSTIC
 	KASSERT(bp->bio_driver1 == NULL,
 	    ("bio_driver1 used by the consumer (geom %s)", cp->geom->name));
 	KASSERT(bp->bio_driver2 == NULL,
 	    ("bio_driver2 used by the consumer (geom %s)", cp->geom->name));
 	KASSERT(bp->bio_pflags == 0,
 	    ("bio_pflags used by the consumer (geom %s)", cp->geom->name));
 	/*
 	 * Remember consumer's private fields, so we can detect if they were
 	 * modified by the provider.
 	 */
 	bp->_bio_caller1 = bp->bio_caller1;
 	bp->_bio_caller2 = bp->bio_caller2;
 	bp->_bio_cflags = bp->bio_cflags;
 #endif
 
 	cmd = bp->bio_cmd;
 	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_GETATTR) {
 		KASSERT(bp->bio_data != NULL,
 		    ("NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd));
 	}
 	if (cmd == BIO_DELETE || cmd == BIO_FLUSH || cmd == BIO_SPEEDUP) {
 		KASSERT(bp->bio_data == NULL,
 		    ("non-NULL bp->data in g_io_request(cmd=%hu)",
 		    bp->bio_cmd));
 	}
 	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_DELETE) {
 		KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
 		    ("wrong offset %jd for sectorsize %u",
 		    bp->bio_offset, cp->provider->sectorsize));
 		KASSERT(bp->bio_length % cp->provider->sectorsize == 0,
 		    ("wrong length %jd for sectorsize %u",
 		    bp->bio_length, cp->provider->sectorsize));
 	}
 
 	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
 
 	bp->bio_from = cp;
 	bp->bio_to = pp;
 	bp->bio_error = 0;
 	bp->bio_completed = 0;
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 
 	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
 	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&bp->bio_t0);
 	else
 		getbinuptime(&bp->bio_t0);
 	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_start_transaction_bio_t0(cp->stat, bp);
 	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_start_transaction_bio_t0(pp->stat, bp);
 #ifdef INVARIANTS
 	atomic_add_int(&cp->nstart, 1);
 #endif
 
 	direct = (cp->flags & G_CF_DIRECT_SEND) != 0 &&
 	    (pp->flags & G_PF_DIRECT_RECEIVE) != 0 &&
 	    curthread != g_down_td &&
 	    ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ||
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) &&
 	    pace == 0;
 	if (direct) {
 		/* Block direct execution if less then half of stack left. */
 		size_t	st, su;
 		GET_STACK_USAGE(st, su);
 		if (su * 2 > st)
 			direct = 0;
 	}
 
 	if (direct) {
 		error = g_io_check(bp);
 		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
 			    "provider %s returned %d", bp, bp->bio_to->name,
 			    error);
 			g_io_deliver(bp, error);
 			return;
 		}
 		bp->bio_to->geom->start(bp);
 	} else {
 		g_bioq_lock(&g_bio_run_down);
 		first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
 		TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
 		bp->bio_flags |= BIO_ONQUEUE;
 		g_bio_run_down.bio_queue_length++;
 		g_bioq_unlock(&g_bio_run_down);
 		/* Pass it on down. */
 		if (first)
 			wakeup(&g_wait_down);
 	}
 }
 
 void
 g_io_deliver(struct bio *bp, int error)
 {
 	struct bintime now;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct mtx *mtxp;
 	int direct, first;
 
 	biotrack(bp, __func__);
 
 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
 	pp = bp->bio_to;
 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
 	cp = bp->bio_from;
 	if (cp == NULL) {
 		bp->bio_error = error;
 		bp->bio_done(bp);
 		return;
 	}
 	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
 	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
 #ifdef DIAGNOSTIC
 	/*
 	 * Some classes - GJournal in particular - can modify bio's
 	 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO
 	 * flag means it's an expected behaviour for that particular geom.
 	 */
 	if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) {
 		KASSERT(bp->bio_caller1 == bp->_bio_caller1,
 		    ("bio_caller1 used by the provider %s", pp->name));
 		KASSERT(bp->bio_caller2 == bp->_bio_caller2,
 		    ("bio_caller2 used by the provider %s", pp->name));
 		KASSERT(bp->bio_cflags == bp->_bio_cflags,
 		    ("bio_cflags used by the provider %s", pp->name));
 	}
 #endif
 	KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0"));
 	KASSERT(bp->bio_completed <= bp->bio_length,
 	    ("bio_completed can't be greater than bio_length"));
 
 	g_trace(G_T_BIO,
 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 
 	/*
 	 * XXX: next two doesn't belong here
 	 */
 	bp->bio_bcount = bp->bio_length;
 	bp->bio_resid = bp->bio_bcount - bp->bio_completed;
 
 	direct = (pp->flags & G_PF_DIRECT_SEND) &&
 		 (cp->flags & G_CF_DIRECT_RECEIVE) &&
 		 curthread != g_up_td;
 	if (direct) {
 		/* Block direct execution if less then half of stack left. */
 		size_t	st, su;
 		GET_STACK_USAGE(st, su);
 		if (su * 2 > st)
 			direct = 0;
 	}
 
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
 	 */
 	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
 	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&now);
 	mtxp = mtx_pool_find(mtxpool_sleep, pp);
 	mtx_lock(mtxp);
 	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_end_transaction_bio_bt(pp->stat, bp, &now);
 	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_end_transaction_bio_bt(cp->stat, bp, &now);
 #ifdef INVARIANTS
 	cp->nend++;
 #endif
 	mtx_unlock(mtxp);
 
 	if (error != ENOMEM) {
 		bp->bio_error = error;
 		if (direct) {
 			biodone(bp);
 		} else {
 			g_bioq_lock(&g_bio_run_up);
 			first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
 			TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
 			bp->bio_flags |= BIO_ONQUEUE;
 			g_bio_run_up.bio_queue_length++;
 			g_bioq_unlock(&g_bio_run_up);
 			if (first)
 				wakeup(&g_wait_up);
 		}
 		return;
 	}
 
 	if (bootverbose)
 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
 	bp->bio_children = 0;
 	bp->bio_inbed = 0;
 	bp->bio_driver1 = NULL;
 	bp->bio_driver2 = NULL;
 	bp->bio_pflags = 0;
 	g_io_request(bp, cp);
 	pace = 1;
 	return;
 }
 
 SYSCTL_DECL(_kern_geom);
 
 static long transient_maps;
 SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
     &transient_maps, 0,
     "Total count of the transient mapping requests");
 u_int transient_map_retries = 10;
 SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
     &transient_map_retries, 0,
     "Max count of retries used before giving up on creating transient map");
 int transient_map_hard_failures;
 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
     &transient_map_hard_failures, 0,
     "Failures to establish the transient mapping due to retry attempts "
     "exhausted");
 int transient_map_soft_failures;
 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
     &transient_map_soft_failures, 0,
     "Count of retried failures to establish the transient mapping");
 int inflight_transient_maps;
 SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
     &inflight_transient_maps, 0,
     "Current count of the active transient maps");
 
 static int
 g_io_transient_map_bio(struct bio *bp)
 {
 	vm_offset_t addr;
 	long size;
 	u_int retried;
 
 	KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
 
 	size = round_page(bp->bio_ma_offset + bp->bio_length);
 	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
 	addr = 0;
 	retried = 0;
 	atomic_add_long(&transient_maps, 1);
 retry:
 	if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
 		if (transient_map_retries != 0 &&
 		    retried >= transient_map_retries) {
 			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
 			    bp, bp->bio_to->name);
 			atomic_add_int(&transient_map_hard_failures, 1);
 			return (EDEADLK/* XXXKIB */);
 		} else {
 			/*
 			 * Naive attempt to quisce the I/O to get more
 			 * in-flight requests completed and defragment
 			 * the transient_arena.
 			 */
 			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
 			    bp, bp->bio_to->name, retried);
 			pause("g_d_tra", hz / 10);
 			retried++;
 			atomic_add_int(&transient_map_soft_failures, 1);
 			goto retry;
 		}
 	}
 	atomic_add_int(&inflight_transient_maps, 1);
 	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
 	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
 	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
 	bp->bio_flags &= ~BIO_UNMAPPED;
 	return (EJUSTRETURN);
 }
 
 void
 g_io_schedule_down(struct thread *tp __unused)
 {
 	struct bio *bp;
 	int error;
 
 	for(;;) {
 		g_bioq_lock(&g_bio_run_down);
 		bp = g_bioq_first(&g_bio_run_down);
 		if (bp == NULL) {
 			CTR0(KTR_GEOM, "g_down going to sleep");
 			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
 			    PRIBIO | PDROP, "-", 0);
 			continue;
 		}
 		CTR0(KTR_GEOM, "g_down has work to do");
 		g_bioq_unlock(&g_bio_run_down);
 		biotrack(bp, __func__);
 		if (pace != 0) {
 			/*
 			 * There has been at least one memory allocation
 			 * failure since the last I/O completed. Pause 1ms to
 			 * give the system a chance to free up memory. We only
 			 * do this once because a large number of allocations
 			 * can fail in the direct dispatch case and there's no
 			 * relationship between the number of these failures and
 			 * the length of the outage. If there's still an outage,
 			 * we'll pause again and again until it's
 			 * resolved. Older versions paused longer and once per
 			 * allocation failure. This was OK for a single threaded
 			 * g_down, but with direct dispatch would lead to max of
 			 * 10 IOPs for minutes at a time when transient memory
 			 * issues prevented allocation for a batch of requests
 			 * from the upper layers.
 			 *
 			 * XXX This pacing is really lame. It needs to be solved
 			 * by other methods. This is OK only because the worst
 			 * case scenario is so rare. In the worst case scenario
 			 * all memory is tied up waiting for I/O to complete
 			 * which can never happen since we can't allocate bios
 			 * for that I/O.
 			 */
 			CTR0(KTR_GEOM, "g_down pacing self");
 			pause("g_down", min(hz/1000, 1));
 			pace = 0;
 		}
 		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
 		    bp->bio_to->name);
 		error = g_io_check(bp);
 		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
 			    "%s returned %d", bp, bp->bio_to->name, error);
 			g_io_deliver(bp, error);
 			continue;
 		}
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,
 		    bp->bio_length);
 		bp->bio_to->geom->start(bp);
 		THREAD_SLEEPING_OK();
 	}
 }
 
 void
 g_io_schedule_up(struct thread *tp __unused)
 {
 	struct bio *bp;
 
 	for(;;) {
 		g_bioq_lock(&g_bio_run_up);
 		bp = g_bioq_first(&g_bio_run_up);
 		if (bp == NULL) {
 			CTR0(KTR_GEOM, "g_up going to sleep");
 			msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
 			    PRIBIO | PDROP, "-", 0);
 			continue;
 		}
 		g_bioq_unlock(&g_bio_run_up);
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off "
 		    "%jd len %ld", bp, bp->bio_to->name,
 		    bp->bio_offset, bp->bio_length);
 		biodone(bp);
 		THREAD_SLEEPING_OK();
 	}
 }
 
 void *
 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
 {
 	struct bio *bp;
 	void *ptr;
 	int errorc;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
 	    length <= maxphys, ("g_read_data(): invalid length %jd",
 	    (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_READ;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	ptr = g_malloc(length, M_WAITOK);
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	errorc = biowait(bp, "gread");
 	if (errorc == 0 && bp->bio_completed != length)
 		errorc = EIO;
 	if (error != NULL)
 		*error = errorc;
 	g_destroy_bio(bp);
 	if (errorc) {
 		g_free(ptr);
 		ptr = NULL;
 	}
 	return (ptr);
 }
 
 /*
  * A read function for use by ffs_sbget when used by GEOM-layer routines.
  */
 int
 g_use_g_read_data(void *devfd, off_t loc, void **bufp, int size)
 {
 	struct g_consumer *cp;
 
 	KASSERT(*bufp == NULL,
 	    ("g_use_g_read_data: non-NULL *bufp %p\n", *bufp));
 
 	cp = (struct g_consumer *)devfd;
 	/*
 	 * Take care not to issue an invalid I/O request. The offset of
 	 * the superblock candidate must be multiples of the provider's
 	 * sector size, otherwise an FFS can't exist on the provider
 	 * anyway.
 	 */
 	if (loc % cp->provider->sectorsize != 0)
 		return (ENOENT);
 	*bufp = g_read_data(cp, loc, size, NULL);
 	if (*bufp == NULL)
 		return (ENOENT);
 	return (0);
 }
 
 int
 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
 	    length <= maxphys, ("g_write_data(): invalid length %jd",
 	    (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gwrite");
 	if (error == 0 && bp->bio_completed != length)
 		error = EIO;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 /*
  * A write function for use by ffs_sbput when used by GEOM-layer routines.
  */
 int
 g_use_g_write_data(void *devfd, off_t loc, void *buf, int size)
 {
 
 	return (g_write_data((struct g_consumer *)devfd, loc, buf, size));
 }
 
 int
 g_delete_data(struct g_consumer *cp, off_t offset, off_t length)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize,
 	    ("g_delete_data(): invalid length %jd", (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_DELETE;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_data = NULL;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gdelete");
 	if (error == 0 && bp->bio_completed != length)
 		error = EIO;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 void
 g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix,
     ...)
 {
 #ifndef PRINTF_BUFR_SIZE
 #define PRINTF_BUFR_SIZE 64
 #endif
 	char bufr[PRINTF_BUFR_SIZE];
 	struct sbuf sb, *sbp __unused;
 	va_list ap;
 
 	sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN);
 	KASSERT(sbp != NULL, ("sbuf_new misused?"));
 
 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
 
 	sbuf_cat(&sb, prefix);
 	g_format_bio(&sb, bp);
 
 	va_start(ap, fmtsuffix);
 	sbuf_vprintf(&sb, fmtsuffix, ap);
 	va_end(ap);
 
 	sbuf_nl_terminate(&sb);
 
 	sbuf_finish(&sb);
 	sbuf_delete(&sb);
 }
 
 void
 g_format_bio(struct sbuf *sb, const struct bio *bp)
 {
 	const char *pname, *cmd = NULL;
 
 	if (bp->bio_to != NULL)
 		pname = bp->bio_to->name;
 	else if (bp->bio_parent != NULL && bp->bio_parent->bio_to != NULL)
 		pname = bp->bio_parent->bio_to->name;
 	else
 		pname = "[unknown]";
 
 	switch (bp->bio_cmd) {
 	case BIO_GETATTR:
 		cmd = "GETATTR";
 		sbuf_printf(sb, "%s[%s(attr=%s)]", pname, cmd,
 		    bp->bio_attribute);
 		return;
 	case BIO_FLUSH:
 		cmd = "FLUSH";
 		sbuf_printf(sb, "%s[%s]", pname, cmd);
 		return;
 	case BIO_ZONE: {
 		char *subcmd = NULL;
 		cmd = "ZONE";
 		switch (bp->bio_zone.zone_cmd) {
 		case DISK_ZONE_OPEN:
 			subcmd = "OPEN";
 			break;
 		case DISK_ZONE_CLOSE:
 			subcmd = "CLOSE";
 			break;
 		case DISK_ZONE_FINISH:
 			subcmd = "FINISH";
 			break;
 		case DISK_ZONE_RWP:
 			subcmd = "RWP";
 			break;
 		case DISK_ZONE_REPORT_ZONES:
 			subcmd = "REPORT ZONES";
 			break;
 		case DISK_ZONE_GET_PARAMS:
 			subcmd = "GET PARAMS";
 			break;
 		default:
 			subcmd = "UNKNOWN";
 			break;
 		}
 		sbuf_printf(sb, "%s[%s,%s]", pname, cmd, subcmd);
 		return;
 	}
 	case BIO_READ:
 		cmd = "READ";
 		break;
 	case BIO_WRITE:
 		cmd = "WRITE";
 		break;
 	case BIO_DELETE:
 		cmd = "DELETE";
 		break;
 	default:
 		cmd = "UNKNOWN";
 		sbuf_printf(sb, "%s[%s()]", pname, cmd);
 		return;
 	}
 	sbuf_printf(sb, "%s[%s(offset=%jd, length=%jd)]", pname, cmd,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
 }
diff --git a/sys/i386/include/proc.h b/sys/i386/include/proc.h
index 7affe60edab9..d693500f2c3e 100644
--- a/sys/i386/include/proc.h
+++ b/sys/i386/include/proc.h
@@ -1,84 +1,77 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)proc.h	7.1 (Berkeley) 5/15/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PROC_H_
 #define	_MACHINE_PROC_H_
 
 #include <machine/segments.h>
 
 struct proc_ldt {
         caddr_t ldt_base;
         int     ldt_len;
         int     ldt_refcnt;
         u_long  ldt_active;
         struct  segment_descriptor ldt_sd;
 };
 
 /*
  * Machine-dependent part of the proc structure for i386.
  * Table of MD locks:
  *       t - Descriptor tables lock
  */
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_flags;	/* (k) */
 	register_t md_spurflt_addr;	/* (k) Spurious page fault address. */
 };
 
 struct mdproc {
 	struct proc_ldt *md_ldt;	/* (t) per-process ldt */
 };
 
 #define	KINFO_PROC_SIZE 768
 
 #ifdef	_KERNEL
 
 #include <machine/md_var.h>
 
-/* Get the current kernel thread stack usage. */
-#define GET_STACK_USAGE(total, used) do {				\
-	struct thread	*td = curthread;				\
-	(total) = (vm_offset_t)get_pcb_td(td) - td->td_kstack;		\
-	(used) = (vm_offset_t)get_pcb_td(td) - (vm_offset_t)&td;	\
-} while (0)
-
 void 	set_user_ldt(struct mdproc *);
 struct 	proc_ldt *user_ldt_alloc(struct mdproc *, int);
 void 	user_ldt_free(struct thread *);
 void	user_ldt_deref(struct proc_ldt *pldt);
 
 extern struct mtx dt_lock;
 #endif	/* _KERNEL */
 
 #endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/i386/include/stack.h b/sys/i386/include/stack.h
index 091ae33893d4..773aca1c66d9 100644
--- a/sys/i386/include/stack.h
+++ b/sys/i386/include/stack.h
@@ -1,6 +1,29 @@
 /*
  * This file is in the public domain.
  */
 /* $FreeBSD$ */
 
+#ifndef _MACHINE_STACK_H_
+#define	_MACHINE_STACK_H_
+
 #include <x86/stack.h>
+
+#ifdef _SYS_PROC_H_
+
+/* Get the current kernel thread stack usage. */
+#define GET_STACK_USAGE(total, used) do {				\
+	struct thread	*td = curthread;				\
+	(total) = (vm_offset_t)get_pcb_td(td) - td->td_kstack;		\
+	(used) = (vm_offset_t)get_pcb_td(td) - (vm_offset_t)&td;	\
+} while (0)
+
+static __inline bool
+kstack_contains(struct thread *td, vm_offset_t va, size_t len)
+{
+	return (va >= td->td_kstack && va + len >= va &&
+	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
+}
+
+#endif	/* _SYS_PROC_H_ */
+
+#endif
diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c
index 98a560e44c9d..2a0144412399 100644
--- a/sys/kern/subr_epoch.c
+++ b/sys/kern/subr_epoch.c
@@ -1,1019 +1,1021 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/epoch.h>
 #include <sys/gtaskqueue.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #ifdef EPOCH_TRACE
 #include <machine/stdarg.h>
 #include <sys/stack.h>
 #include <sys/tree.h>
 #endif
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
+#include <machine/stack.h>
+
 #include <ck_epoch.h>
 
 #ifdef __amd64__
 #define EPOCH_ALIGN CACHE_LINE_SIZE*2
 #else
 #define EPOCH_ALIGN CACHE_LINE_SIZE
 #endif
 
 TAILQ_HEAD (epoch_tdlist, epoch_tracker);
 typedef struct epoch_record {
 	ck_epoch_record_t er_record;
 	struct epoch_context er_drain_ctx;
 	struct epoch *er_parent;
 	volatile struct epoch_tdlist er_tdlist;
 	volatile uint32_t er_gen;
 	uint32_t er_cpuid;
 #ifdef INVARIANTS
 	/* Used to verify record ownership for non-preemptible epochs. */
 	struct thread *er_td;
 #endif
 } __aligned(EPOCH_ALIGN)     *epoch_record_t;
 
 struct epoch {
 	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
 	epoch_record_t e_pcpu_record;
 	int	e_in_use;
 	int	e_flags;
 	struct sx e_drain_sx;
 	struct mtx e_drain_mtx;
 	volatile int e_drain_count;
 	const char *e_name;
 };
 
 /* arbitrary --- needs benchmarking */
 #define MAX_ADAPTIVE_SPIN 100
 #define MAX_EPOCHS 64
 
 CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
 SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "epoch information");
 SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "epoch stats");
 
 /* Stats. */
 static counter_u64_t block_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW,
     &block_count, "# of times a thread was in an epoch when epoch_wait was called");
 static counter_u64_t migrate_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW,
     &migrate_count, "# of times thread was migrated to another CPU in epoch_wait");
 static counter_u64_t turnstile_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW,
     &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait");
 static counter_u64_t switch_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
     &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
 static counter_u64_t epoch_call_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
     &epoch_call_count, "# of times a callback was deferred");
 static counter_u64_t epoch_call_task_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
     &epoch_call_task_count, "# of times a callback task was run");
 
 TAILQ_HEAD (threadlist, thread);
 
 CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
     ck_epoch_entry_container)
 
 static struct epoch epoch_array[MAX_EPOCHS];
 
 DPCPU_DEFINE(struct grouptask, epoch_cb_task);
 DPCPU_DEFINE(int, epoch_cb_count);
 
 static __read_mostly int inited;
 __read_mostly epoch_t global_epoch;
 __read_mostly epoch_t global_epoch_preempt;
 
 static void epoch_call_task(void *context __unused);
 static 	uma_zone_t pcpu_zone_record;
 
 static struct sx epoch_sx;
 
 #define	EPOCH_LOCK() sx_xlock(&epoch_sx)
 #define	EPOCH_UNLOCK() sx_xunlock(&epoch_sx)
 
 static epoch_record_t
 epoch_currecord(epoch_t epoch)
 {
 
 	return (zpcpu_get(epoch->e_pcpu_record));
 }
 
 #ifdef EPOCH_TRACE
 struct stackentry {
 	RB_ENTRY(stackentry) se_node;
 	struct stack se_stack;
 };
 
 static int
 stackentry_compare(struct stackentry *a, struct stackentry *b)
 {
 
 	if (a->se_stack.depth > b->se_stack.depth)
 		return (1);
 	if (a->se_stack.depth < b->se_stack.depth)
 		return (-1);
 	for (int i = 0; i < a->se_stack.depth; i++) {
 		if (a->se_stack.pcs[i] > b->se_stack.pcs[i])
 			return (1);
 		if (a->se_stack.pcs[i] < b->se_stack.pcs[i])
 			return (-1);
 	}
 
 	return (0);
 }
 
 RB_HEAD(stacktree, stackentry) epoch_stacks = RB_INITIALIZER(&epoch_stacks);
 RB_GENERATE_STATIC(stacktree, stackentry, se_node, stackentry_compare);
 
 static struct mtx epoch_stacks_lock;
 MTX_SYSINIT(epochstacks, &epoch_stacks_lock, "epoch_stacks", MTX_DEF);
 
 static bool epoch_trace_stack_print = true;
 SYSCTL_BOOL(_kern_epoch, OID_AUTO, trace_stack_print, CTLFLAG_RWTUN,
     &epoch_trace_stack_print, 0, "Print stack traces on epoch reports");
 
 static void epoch_trace_report(const char *fmt, ...) __printflike(1, 2);
 static inline void
 epoch_trace_report(const char *fmt, ...)
 {
 	va_list ap;
 	struct stackentry se, *new;
 
 	stack_save(&se.se_stack);
 
 	/* Tree is never reduced - go lockless. */
 	if (RB_FIND(stacktree, &epoch_stacks, &se) != NULL)
 		return;
 
 	new = malloc(sizeof(*new), M_STACK, M_NOWAIT);
 	if (new != NULL) {
 		bcopy(&se.se_stack, &new->se_stack, sizeof(struct stack));
 
 		mtx_lock(&epoch_stacks_lock);
 		new = RB_INSERT(stacktree, &epoch_stacks, new);
 		mtx_unlock(&epoch_stacks_lock);
 		if (new != NULL)
 			free(new, M_STACK);
 	}
 
 	va_start(ap, fmt);
 	(void)vprintf(fmt, ap);
 	va_end(ap);
 	if (epoch_trace_stack_print)
 		stack_print_ddb(&se.se_stack);
 }
 
 static inline void
 epoch_trace_enter(struct thread *td, epoch_t epoch, epoch_tracker_t et,
     const char *file, int line)
 {
 	epoch_tracker_t iet;
 
 	SLIST_FOREACH(iet, &td->td_epochs, et_tlink) {
 		if (iet->et_epoch != epoch)
 			continue;
 		epoch_trace_report("Recursively entering epoch %s "
 		    "at %s:%d, previously entered at %s:%d\n",
 		    epoch->e_name, file, line,
 		    iet->et_file, iet->et_line);
 	}
 	et->et_epoch = epoch;
 	et->et_file = file;
 	et->et_line = line;
 	et->et_flags = 0;
 	SLIST_INSERT_HEAD(&td->td_epochs, et, et_tlink);
 }
 
 static inline void
 epoch_trace_exit(struct thread *td, epoch_t epoch, epoch_tracker_t et,
     const char *file, int line)
 {
 
 	if (SLIST_FIRST(&td->td_epochs) != et) {
 		epoch_trace_report("Exiting epoch %s in a not nested order "
 		    "at %s:%d. Most recently entered %s at %s:%d\n",
 		    epoch->e_name,
 		    file, line,
 		    SLIST_FIRST(&td->td_epochs)->et_epoch->e_name,
 		    SLIST_FIRST(&td->td_epochs)->et_file,
 		    SLIST_FIRST(&td->td_epochs)->et_line);
 		/* This will panic if et is not anywhere on td_epochs. */
 		SLIST_REMOVE(&td->td_epochs, et, epoch_tracker, et_tlink);
 	} else
 		SLIST_REMOVE_HEAD(&td->td_epochs, et_tlink);
 	if (et->et_flags & ET_REPORT_EXIT)
 		printf("Td %p exiting epoch %s at %s:%d\n", td, epoch->e_name,
 		    file, line);
 }
 
 /* Used by assertions that check thread state before going to sleep. */
 void
 epoch_trace_list(struct thread *td)
 {
 	epoch_tracker_t iet;
 
 	SLIST_FOREACH(iet, &td->td_epochs, et_tlink)
 		printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name,
 		    iet->et_file, iet->et_line);
 }
 
 void
 epoch_where_report(epoch_t epoch)
 {
 	epoch_record_t er;
 	struct epoch_tracker *tdwait;
 
 	MPASS(epoch != NULL);
 	MPASS((epoch->e_flags & EPOCH_PREEMPT) != 0);
 	MPASS(!THREAD_CAN_SLEEP());
 	critical_enter();
 	er = epoch_currecord(epoch);
 	TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 		if (tdwait->et_td == curthread)
 			break;
 	critical_exit();
 	if (tdwait != NULL) {
 		tdwait->et_flags |= ET_REPORT_EXIT;
 		printf("Td %p entered epoch %s at %s:%d\n", curthread,
 		    epoch->e_name, tdwait->et_file, tdwait->et_line);
 	}
 }
 #endif /* EPOCH_TRACE */
 
 static void
 epoch_init(void *arg __unused)
 {
 	int cpu;
 
 	block_count = counter_u64_alloc(M_WAITOK);
 	migrate_count = counter_u64_alloc(M_WAITOK);
 	turnstile_count = counter_u64_alloc(M_WAITOK);
 	switch_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_task_count = counter_u64_alloc(M_WAITOK);
 
 	pcpu_zone_record = uma_zcreate("epoch_record pcpu",
 	    sizeof(struct epoch_record), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZONE_PCPU);
 	CPU_FOREACH(cpu) {
 		GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0,
 		    epoch_call_task, NULL);
 		taskqgroup_attach_cpu(qgroup_softirq,
 		    DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, NULL, NULL,
 		    "epoch call task");
 	}
 #ifdef EPOCH_TRACE
 	SLIST_INIT(&thread0.td_epochs);
 #endif
 	sx_init(&epoch_sx, "epoch-sx");
 	inited = 1;
 	global_epoch = epoch_alloc("Global", 0);
 	global_epoch_preempt = epoch_alloc("Global preemptible", EPOCH_PREEMPT);
 }
 SYSINIT(epoch, SI_SUB_EPOCH, SI_ORDER_FIRST, epoch_init, NULL);
 
 #if !defined(EARLY_AP_STARTUP)
 static void
 epoch_init_smp(void *dummy __unused)
 {
 	inited = 2;
 }
 SYSINIT(epoch_smp, SI_SUB_SMP + 1, SI_ORDER_FIRST, epoch_init_smp, NULL);
 #endif
 
 static void
 epoch_ctor(epoch_t epoch)
 {
 	epoch_record_t er;
 	int cpu;
 
 	epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK);
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		bzero(er, sizeof(*er));
 		ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 		TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
 		er->er_cpuid = cpu;
 		er->er_parent = epoch;
 	}
 }
 
 static void
 epoch_adjust_prio(struct thread *td, u_char prio)
 {
 
 	thread_lock(td);
 	sched_prio(td, prio);
 	thread_unlock(td);
 }
 
 epoch_t
 epoch_alloc(const char *name, int flags)
 {
 	epoch_t epoch;
 	int i;
 
 	MPASS(name != NULL);
 
 	if (__predict_false(!inited))
 		panic("%s called too early in boot", __func__);
 
 	EPOCH_LOCK();
 
 	/*
 	 * Find a free index in the epoch array. If no free index is
 	 * found, try to use the index after the last one.
 	 */
 	for (i = 0;; i++) {
 		/*
 		 * If too many epochs are currently allocated,
 		 * return NULL.
 		 */
 		if (i == MAX_EPOCHS) {
 			epoch = NULL;
 			goto done;
 		}
 		if (epoch_array[i].e_in_use == 0)
 			break;
 	}
 
 	epoch = epoch_array + i;
 	ck_epoch_init(&epoch->e_epoch);
 	epoch_ctor(epoch);
 	epoch->e_flags = flags;
 	epoch->e_name = name;
 	sx_init(&epoch->e_drain_sx, "epoch-drain-sx");
 	mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF);
 
 	/*
 	 * Set e_in_use last, because when this field is set the
 	 * epoch_call_task() function will start scanning this epoch
 	 * structure.
 	 */
 	atomic_store_rel_int(&epoch->e_in_use, 1);
 done:
 	EPOCH_UNLOCK();
 	return (epoch);
 }
 
 void
 epoch_free(epoch_t epoch)
 {
 #ifdef INVARIANTS
 	int cpu;
 #endif
 
 	EPOCH_LOCK();
 
 	MPASS(epoch->e_in_use != 0);
 
 	epoch_drain_callbacks(epoch);
 
 	atomic_store_rel_int(&epoch->e_in_use, 0);
 	/*
 	 * Make sure the epoch_call_task() function see e_in_use equal
 	 * to zero, by calling epoch_wait() on the global_epoch:
 	 */
 	epoch_wait(global_epoch);
 #ifdef INVARIANTS
 	CPU_FOREACH(cpu) {
 		epoch_record_t er;
 
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 
 		/*
 		 * Sanity check: none of the records should be in use anymore.
 		 * We drained callbacks above and freeing the pcpu records is
 		 * imminent.
 		 */
 		MPASS(er->er_td == NULL);
 		MPASS(TAILQ_EMPTY(&er->er_tdlist));
 	}
 #endif
 	uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record);
 	mtx_destroy(&epoch->e_drain_mtx);
 	sx_destroy(&epoch->e_drain_sx);
 	memset(epoch, 0, sizeof(*epoch));
 
 	EPOCH_UNLOCK();
 }
 
 #define INIT_CHECK(epoch)					\
 	do {							\
 		if (__predict_false((epoch) == NULL))		\
 			return;					\
 	} while (0)
 
 void
 _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
 {
 	struct epoch_record *er;
 	struct thread *td;
 
 	MPASS(cold || epoch != NULL);
 	td = curthread;
 	MPASS(kstack_contains(td, (vm_offset_t)et, sizeof(*et)));
 
 	INIT_CHECK(epoch);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 
 #ifdef EPOCH_TRACE
 	epoch_trace_enter(td, epoch, et, file, line);
 #endif
 	et->et_td = td;
 	THREAD_NO_SLEEPING();
 	critical_enter();
 	sched_pin();
 	et->et_old_priority = td->td_priority;
 	er = epoch_currecord(epoch);
 	/* Record-level tracking is reserved for non-preemptible epochs. */
 	MPASS(er->er_td == NULL);
 	TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link);
 	ck_epoch_begin(&er->er_record, &et->et_section);
 	critical_exit();
 }
 
 void
 epoch_enter(epoch_t epoch)
 {
 	epoch_record_t er;
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	critical_enter();
 	er = epoch_currecord(epoch);
 #ifdef INVARIANTS
 	if (er->er_record.active == 0) {
 		MPASS(er->er_td == NULL);
 		er->er_td = curthread;
 	} else {
 		/* We've recursed, just make sure our accounting isn't wrong. */
 		MPASS(er->er_td == curthread);
 	}
 #endif
 	ck_epoch_begin(&er->er_record, NULL);
 }
 
 void
 _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
 {
 	struct epoch_record *er;
 	struct thread *td;
 
 	INIT_CHECK(epoch);
 	td = curthread;
 	critical_enter();
 	sched_unpin();
 	THREAD_SLEEPING_OK();
 	er = epoch_currecord(epoch);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	MPASS(et != NULL);
 	MPASS(et->et_td == td);
 #ifdef INVARIANTS
 	et->et_td = (void*)0xDEADBEEF;
 	/* Record-level tracking is reserved for non-preemptible epochs. */
 	MPASS(er->er_td == NULL);
 #endif
 	ck_epoch_end(&er->er_record, &et->et_section);
 	TAILQ_REMOVE(&er->er_tdlist, et, et_link);
 	er->er_gen++;
 	if (__predict_false(et->et_old_priority != td->td_priority))
 		epoch_adjust_prio(td, et->et_old_priority);
 	critical_exit();
 #ifdef EPOCH_TRACE
 	epoch_trace_exit(td, epoch, et, file, line);
 #endif
 }
 
 void
 epoch_exit(epoch_t epoch)
 {
 	epoch_record_t er;
 
 	INIT_CHECK(epoch);
 	er = epoch_currecord(epoch);
 	ck_epoch_end(&er->er_record, NULL);
 #ifdef INVARIANTS
 	MPASS(er->er_td == curthread);
 	if (er->er_record.active == 0)
 		er->er_td = NULL;
 #endif
 	critical_exit();
 }
 
 /*
  * epoch_block_handler_preempt() is a callback from the CK code when another
  * thread is currently in an epoch section.
  */
 static void
 epoch_block_handler_preempt(struct ck_epoch *global __unused,
     ck_epoch_record_t *cr, void *arg __unused)
 {
 	epoch_record_t record;
 	struct thread *td, *owner, *curwaittd;
 	struct epoch_tracker *tdwait;
 	struct turnstile *ts;
 	struct lock_object *lock;
 	int spincount, gen;
 	int locksheld __unused;
 
 	record = __containerof(cr, struct epoch_record, er_record);
 	td = curthread;
 	locksheld = td->td_locks;
 	spincount = 0;
 	counter_u64_add(block_count, 1);
 	/*
 	 * We lost a race and there's no longer any threads
 	 * on the CPU in an epoch section.
 	 */
 	if (TAILQ_EMPTY(&record->er_tdlist))
 		return;
 
 	if (record->er_cpuid != curcpu) {
 		/*
 		 * If the head of the list is running, we can wait for it
 		 * to remove itself from the list and thus save us the
 		 * overhead of a migration
 		 */
 		gen = record->er_gen;
 		thread_unlock(td);
 		/*
 		 * We can't actually check if the waiting thread is running
 		 * so we simply poll for it to exit before giving up and
 		 * migrating.
 		 */
 		do {
 			cpu_spinwait();
 		} while (!TAILQ_EMPTY(&record->er_tdlist) &&
 				 gen == record->er_gen &&
 				 spincount++ < MAX_ADAPTIVE_SPIN);
 		thread_lock(td);
 		/*
 		 * If the generation has changed we can poll again
 		 * otherwise we need to migrate.
 		 */
 		if (gen != record->er_gen)
 			return;
 		/*
 		 * Being on the same CPU as that of the record on which
 		 * we need to wait allows us access to the thread
 		 * list associated with that CPU. We can then examine the
 		 * oldest thread in the queue and wait on its turnstile
 		 * until it resumes and so on until a grace period
 		 * elapses.
 		 *
 		 */
 		counter_u64_add(migrate_count, 1);
 		sched_bind(td, record->er_cpuid);
 		/*
 		 * At this point we need to return to the ck code
 		 * to scan to see if a grace period has elapsed.
 		 * We can't move on to check the thread list, because
 		 * in the meantime new threads may have arrived that
 		 * in fact belong to a different epoch.
 		 */
 		return;
 	}
 	/*
 	 * Try to find a thread in an epoch section on this CPU
 	 * waiting on a turnstile. Otherwise find the lowest
 	 * priority thread (highest prio value) and drop our priority
 	 * to match to allow it to run.
 	 */
 	TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
 		/*
 		 * Propagate our priority to any other waiters to prevent us
 		 * from starving them. They will have their original priority
 		 * restore on exit from epoch_wait().
 		 */
 		curwaittd = tdwait->et_td;
 		if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
 			critical_enter();
 			thread_unlock(td);
 			thread_lock(curwaittd);
 			sched_prio(curwaittd, td->td_priority);
 			thread_unlock(curwaittd);
 			thread_lock(td);
 			critical_exit();
 		}
 		if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
 		    ((ts = curwaittd->td_blocked) != NULL)) {
 			/*
 			 * We unlock td to allow turnstile_wait to reacquire
 			 * the thread lock. Before unlocking it we enter a
 			 * critical section to prevent preemption after we
 			 * reenable interrupts by dropping the thread lock in
 			 * order to prevent curwaittd from getting to run.
 			 */
 			critical_enter();
 			thread_unlock(td);
 
 			if (turnstile_lock(ts, &lock, &owner)) {
 				if (ts == curwaittd->td_blocked) {
 					MPASS(TD_IS_INHIBITED(curwaittd) &&
 					    TD_ON_LOCK(curwaittd));
 					critical_exit();
 					turnstile_wait(ts, owner,
 					    curwaittd->td_tsqueue);
 					counter_u64_add(turnstile_count, 1);
 					thread_lock(td);
 					return;
 				}
 				turnstile_unlock(ts, lock);
 			}
 			thread_lock(td);
 			critical_exit();
 			KASSERT(td->td_locks == locksheld,
 			    ("%d extra locks held", td->td_locks - locksheld));
 		}
 	}
 	/*
 	 * We didn't find any threads actually blocked on a lock
 	 * so we have nothing to do except context switch away.
 	 */
 	counter_u64_add(switch_count, 1);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	/*
 	 * It is important the thread lock is dropped while yielding
 	 * to allow other threads to acquire the lock pointed to by
 	 * TDQ_LOCKPTR(td). Currently mi_switch() will unlock the
 	 * thread lock before returning. Else a deadlock like
 	 * situation might happen.
 	 */
 	thread_lock(td);
 }
 
 void
 epoch_wait_preempt(epoch_t epoch)
 {
 	struct thread *td;
 	int was_bound;
 	int old_cpu;
 	int old_pinned;
 	u_char old_prio;
 	int locks __unused;
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	td = curthread;
 #ifdef INVARIANTS
 	locks = curthread->td_locks;
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	if ((epoch->e_flags & EPOCH_LOCKED) == 0)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "epoch_wait() can be long running");
 	KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle "
 	    "of an epoch section of the same epoch"));
 #endif
 	DROP_GIANT();
 	thread_lock(td);
 
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	old_prio = td->td_priority;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 	sched_bind(td, old_cpu);
 
 	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt,
 	    NULL);
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	/* restore thread priority */
 	sched_prio(td, old_prio);
 	thread_unlock(td);
 	PICKUP_GIANT();
 	KASSERT(td->td_locks == locks,
 	    ("%d residual locks held", td->td_locks - locks));
 }
 
 static void
 epoch_block_handler(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused,
     void *arg __unused)
 {
 	cpu_spinwait();
 }
 
 void
 epoch_wait(epoch_t epoch)
 {
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	MPASS(epoch->e_flags == 0);
 	critical_enter();
 	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler, NULL);
 	critical_exit();
 }
 
 void
 epoch_call(epoch_t epoch, epoch_callback_t callback, epoch_context_t ctx)
 {
 	epoch_record_t er;
 	ck_epoch_entry_t *cb;
 
 	cb = (void *)ctx;
 
 	MPASS(callback);
 	/* too early in boot to have epoch set up */
 	if (__predict_false(epoch == NULL))
 		goto boottime;
 #if !defined(EARLY_AP_STARTUP)
 	if (__predict_false(inited < 2))
 		goto boottime;
 #endif
 
 	critical_enter();
 	*DPCPU_PTR(epoch_cb_count) += 1;
 	er = epoch_currecord(epoch);
 	ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
 	critical_exit();
 	return;
 boottime:
 	callback(ctx);
 }
 
 static void
 epoch_call_task(void *arg __unused)
 {
 	ck_stack_entry_t *cursor, *head, *next;
 	ck_epoch_record_t *record;
 	epoch_record_t er;
 	epoch_t epoch;
 	ck_stack_t cb_stack;
 	int i, npending, total;
 
 	ck_stack_init(&cb_stack);
 	critical_enter();
 	epoch_enter(global_epoch);
 	for (total = i = 0; i != MAX_EPOCHS; i++) {
 		epoch = epoch_array + i;
 		if (__predict_false(
 		    atomic_load_acq_int(&epoch->e_in_use) == 0))
 			continue;
 		er = epoch_currecord(epoch);
 		record = &er->er_record;
 		if ((npending = record->n_pending) == 0)
 			continue;
 		ck_epoch_poll_deferred(record, &cb_stack);
 		total += npending - record->n_pending;
 	}
 	epoch_exit(global_epoch);
 	*DPCPU_PTR(epoch_cb_count) -= total;
 	critical_exit();
 
 	counter_u64_add(epoch_call_count, total);
 	counter_u64_add(epoch_call_task_count, 1);
 
 	head = ck_stack_batch_pop_npsc(&cb_stack);
 	for (cursor = head; cursor != NULL; cursor = next) {
 		struct ck_epoch_entry *entry =
 		    ck_epoch_entry_container(cursor);
 
 		next = CK_STACK_NEXT(cursor);
 		entry->function(entry);
 	}
 }
 
 static int
 in_epoch_verbose_preempt(epoch_t epoch, int dump_onfail)
 {
 	epoch_record_t er;
 	struct epoch_tracker *tdwait;
 	struct thread *td;
 
 	MPASS(epoch != NULL);
 	MPASS((epoch->e_flags & EPOCH_PREEMPT) != 0);
 	td = curthread;
 	if (THREAD_CAN_SLEEP())
 		return (0);
 	critical_enter();
 	er = epoch_currecord(epoch);
 	TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 		if (tdwait->et_td == td) {
 			critical_exit();
 			return (1);
 		}
 #ifdef INVARIANTS
 	if (dump_onfail) {
 		MPASS(td->td_pinned);
 		printf("cpu: %d id: %d\n", curcpu, td->td_tid);
 		TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 			printf("td_tid: %d ", tdwait->et_td->td_tid);
 		printf("\n");
 	}
 #endif
 	critical_exit();
 	return (0);
 }
 
 #ifdef INVARIANTS
 static void
 epoch_assert_nocpu(epoch_t epoch, struct thread *td)
 {
 	epoch_record_t er;
 	int cpu;
 	bool crit;
 
 	crit = td->td_critnest > 0;
 
 	/* Check for a critical section mishap. */
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		KASSERT(er->er_td != td,
 		    ("%s critical section in epoch '%s', from cpu %d",
 		    (crit ? "exited" : "re-entered"), epoch->e_name, cpu));
 	}
 }
 #else
 #define	epoch_assert_nocpu(e, td) do {} while (0)
 #endif
 
 int
 in_epoch_verbose(epoch_t epoch, int dump_onfail)
 {
 	epoch_record_t er;
 	struct thread *td;
 
 	if (__predict_false((epoch) == NULL))
 		return (0);
 	if ((epoch->e_flags & EPOCH_PREEMPT) != 0)
 		return (in_epoch_verbose_preempt(epoch, dump_onfail));
 
 	/*
 	 * The thread being in a critical section is a necessary
 	 * condition to be correctly inside a non-preemptible epoch,
 	 * so it's definitely not in this epoch.
 	 */
 	td = curthread;
 	if (td->td_critnest == 0) {
 		epoch_assert_nocpu(epoch, td);
 		return (0);
 	}
 
 	/*
 	 * The current cpu is in a critical section, so the epoch record will be
 	 * stable for the rest of this function.  Knowing that the record is not
 	 * active is sufficient for knowing whether we're in this epoch or not,
 	 * since it's a pcpu record.
 	 */
 	er = epoch_currecord(epoch);
 	if (er->er_record.active == 0) {
 		epoch_assert_nocpu(epoch, td);
 		return (0);
 	}
 
 	MPASS(er->er_td == td);
 	return (1);
 }
 
 int
 in_epoch(epoch_t epoch)
 {
 	return (in_epoch_verbose(epoch, 0));
 }
 
 static void
 epoch_drain_cb(struct epoch_context *ctx)
 {
 	struct epoch *epoch =
 	    __containerof(ctx, struct epoch_record, er_drain_ctx)->er_parent;
 
 	if (atomic_fetchadd_int(&epoch->e_drain_count, -1) == 1) {
 		mtx_lock(&epoch->e_drain_mtx);
 		wakeup(epoch);
 		mtx_unlock(&epoch->e_drain_mtx);
 	}
 }
 
 void
 epoch_drain_callbacks(epoch_t epoch)
 {
 	epoch_record_t er;
 	struct thread *td;
 	int was_bound;
 	int old_pinned;
 	int old_cpu;
 	int cpu;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "epoch_drain_callbacks() may sleep!");
 
 	/* too early in boot to have epoch set up */
 	if (__predict_false(epoch == NULL))
 		return;
 #if !defined(EARLY_AP_STARTUP)
 	if (__predict_false(inited < 2))
 		return;
 #endif
 	DROP_GIANT();
 
 	sx_xlock(&epoch->e_drain_sx);
 	mtx_lock(&epoch->e_drain_mtx);
 
 	td = curthread;
 	thread_lock(td);
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 
 	CPU_FOREACH(cpu)
 		epoch->e_drain_count++;
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		sched_bind(td, cpu);
 		epoch_call(epoch, &epoch_drain_cb, &er->er_drain_ctx);
 	}
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	thread_unlock(td);
 
 	while (epoch->e_drain_count != 0)
 		msleep(epoch, &epoch->e_drain_mtx, PZERO, "EDRAIN", 0);
 
 	mtx_unlock(&epoch->e_drain_mtx);
 	sx_xunlock(&epoch->e_drain_sx);
 
 	PICKUP_GIANT();
 }
diff --git a/sys/netgraph/ng_base.c b/sys/netgraph/ng_base.c
index 092231850f18..205b6041053b 100644
--- a/sys/netgraph/ng_base.c
+++ b/sys/netgraph/ng_base.c
@@ -1,3900 +1,3902 @@
 /*-
  * Copyright (c) 1996-1999 Whistle Communications, Inc.
  * All rights reserved.
  *
  * Subject to the following obligations and disclaimer of warranty, use and
  * redistribution of this software, in source or object code forms, with or
  * without modifications are expressly permitted by Whistle Communications;
  * provided, however, that:
  * 1. Any and all reproductions of the source or object code must include the
  *    copyright notice above and the following disclaimer of warranties; and
  * 2. No rights are granted, in any manner or form, to use Whistle
  *    Communications, Inc. trademarks, including the mark "WHISTLE
  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
  *    such appears in the above copyright notice or in the software.
  *
  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
  * OF SUCH DAMAGE.
  *
  * Authors: Julian Elischer <julian@freebsd.org>
  *          Archie Cobbs <archie@freebsd.org>
  *
  * $FreeBSD$
  * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $
  */
 
 /*
  * This file implements the base netgraph code.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/ctype.h>
 #include <sys/hash.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/epoch.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 #include <machine/cpu.h>
 #include <vm/uma.h>
 
+#include <machine/stack.h>
+
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
 
 MODULE_VERSION(netgraph, NG_ABI_VERSION);
 
 /* Mutex to protect topology events. */
 static struct rwlock	ng_topo_lock;
 #define	TOPOLOGY_RLOCK()	rw_rlock(&ng_topo_lock)
 #define	TOPOLOGY_RUNLOCK()	rw_runlock(&ng_topo_lock)
 #define	TOPOLOGY_WLOCK()	rw_wlock(&ng_topo_lock)
 #define	TOPOLOGY_WUNLOCK()	rw_wunlock(&ng_topo_lock)
 #define	TOPOLOGY_NOTOWNED()	rw_assert(&ng_topo_lock, RA_UNLOCKED)
 
 #ifdef	NETGRAPH_DEBUG
 static struct mtx	ng_nodelist_mtx; /* protects global node/hook lists */
 static struct mtx	ngq_mtx;	/* protects the queue item list */
 
 static SLIST_HEAD(, ng_node) ng_allnodes;
 static LIST_HEAD(, ng_node) ng_freenodes; /* in debug, we never free() them */
 static SLIST_HEAD(, ng_hook) ng_allhooks;
 static LIST_HEAD(, ng_hook) ng_freehooks; /* in debug, we never free() them */
 
 static void ng_dumpitems(void);
 static void ng_dumpnodes(void);
 static void ng_dumphooks(void);
 
 #endif	/* NETGRAPH_DEBUG */
 /*
  * DEAD versions of the structures.
  * In order to avoid races, it is sometimes necessary to point
  * at SOMETHING even though theoretically, the current entity is
  * INVALID. Use these to avoid these races.
  */
 struct ng_type ng_deadtype = {
 	NG_ABI_VERSION,
 	"dead",
 	NULL,	/* modevent */
 	NULL,	/* constructor */
 	NULL,	/* rcvmsg */
 	NULL,	/* shutdown */
 	NULL,	/* newhook */
 	NULL,	/* findhook */
 	NULL,	/* connect */
 	NULL,	/* rcvdata */
 	NULL,	/* disconnect */
 	NULL, 	/* cmdlist */
 };
 
 struct ng_node ng_deadnode = {
 	"dead",
 	&ng_deadtype,	
 	NGF_INVALID,
 	0,	/* numhooks */
 	NULL,	/* private */
 	0,	/* ID */
 	LIST_HEAD_INITIALIZER(ng_deadnode.nd_hooks),
 	{},	/* all_nodes list entry */
 	{},	/* id hashtable list entry */
 	{	0,
 		0,
 		{}, /* should never use! (should hang) */
 		{}, /* workqueue entry */
 		STAILQ_HEAD_INITIALIZER(ng_deadnode.nd_input_queue.queue),
 	},
 	1,	/* refs */
 	NULL,	/* vnet */
 #ifdef	NETGRAPH_DEBUG
 	ND_MAGIC,
 	__FILE__,
 	__LINE__,
 	{NULL}
 #endif	/* NETGRAPH_DEBUG */
 };
 
 struct ng_hook ng_deadhook = {
 	"dead",
 	NULL,		/* private */
 	HK_INVALID | HK_DEAD,
 	0,		/* undefined data link type */
 	&ng_deadhook,	/* Peer is self */
 	&ng_deadnode,	/* attached to deadnode */
 	{},		/* hooks list */
 	NULL,		/* override rcvmsg() */
 	NULL,		/* override rcvdata() */
 	1,		/* refs always >= 1 */
 #ifdef	NETGRAPH_DEBUG
 	HK_MAGIC,
 	__FILE__,
 	__LINE__,
 	{NULL}
 #endif	/* NETGRAPH_DEBUG */
 };
 
 /*
  * END DEAD STRUCTURES
  */
 /* List nodes with unallocated work */
 static STAILQ_HEAD(, ng_node) ng_worklist = STAILQ_HEAD_INITIALIZER(ng_worklist);
 static struct mtx	ng_worklist_mtx;   /* MUST LOCK NODE FIRST */
 
 /* List of installed types */
 static LIST_HEAD(, ng_type) ng_typelist;
 static struct rwlock	ng_typelist_lock;
 #define	TYPELIST_RLOCK()	rw_rlock(&ng_typelist_lock)
 #define	TYPELIST_RUNLOCK()	rw_runlock(&ng_typelist_lock)
 #define	TYPELIST_WLOCK()	rw_wlock(&ng_typelist_lock)
 #define	TYPELIST_WUNLOCK()	rw_wunlock(&ng_typelist_lock)
 
 /* Hash related definitions. */
 LIST_HEAD(nodehash, ng_node);
 VNET_DEFINE_STATIC(struct nodehash *, ng_ID_hash);
 VNET_DEFINE_STATIC(u_long, ng_ID_hmask);
 VNET_DEFINE_STATIC(u_long, ng_nodes);
 VNET_DEFINE_STATIC(struct nodehash *, ng_name_hash);
 VNET_DEFINE_STATIC(u_long, ng_name_hmask);
 VNET_DEFINE_STATIC(u_long, ng_named_nodes);
 #define	V_ng_ID_hash		VNET(ng_ID_hash)
 #define	V_ng_ID_hmask		VNET(ng_ID_hmask)
 #define	V_ng_nodes		VNET(ng_nodes)
 #define	V_ng_name_hash		VNET(ng_name_hash)
 #define	V_ng_name_hmask		VNET(ng_name_hmask)
 #define	V_ng_named_nodes	VNET(ng_named_nodes)
 
 static struct rwlock	ng_idhash_lock;
 #define	IDHASH_RLOCK()		rw_rlock(&ng_idhash_lock)
 #define	IDHASH_RUNLOCK()	rw_runlock(&ng_idhash_lock)
 #define	IDHASH_WLOCK()		rw_wlock(&ng_idhash_lock)
 #define	IDHASH_WUNLOCK()	rw_wunlock(&ng_idhash_lock)
 
 /* Method to find a node.. used twice so do it here */
 #define NG_IDHASH_FN(ID) ((ID) % (V_ng_ID_hmask + 1))
 #define NG_IDHASH_FIND(ID, node)					\
 	do { 								\
 		rw_assert(&ng_idhash_lock, RA_LOCKED);			\
 		LIST_FOREACH(node, &V_ng_ID_hash[NG_IDHASH_FN(ID)],	\
 						nd_idnodes) {		\
 			if (NG_NODE_IS_VALID(node)			\
 			&& (NG_NODE_ID(node) == ID)) {			\
 				break;					\
 			}						\
 		}							\
 	} while (0)
 
 static struct rwlock	ng_namehash_lock;
 #define	NAMEHASH_RLOCK()	rw_rlock(&ng_namehash_lock)
 #define	NAMEHASH_RUNLOCK()	rw_runlock(&ng_namehash_lock)
 #define	NAMEHASH_WLOCK()	rw_wlock(&ng_namehash_lock)
 #define	NAMEHASH_WUNLOCK()	rw_wunlock(&ng_namehash_lock)
 
 /* Internal functions */
 static int	ng_add_hook(node_p node, const char *name, hook_p * hookp);
 static int	ng_generic_msg(node_p here, item_p item, hook_p lasthook);
 static ng_ID_t	ng_decodeidname(const char *name);
 static int	ngb_mod_event(module_t mod, int event, void *data);
 static void	ng_worklist_add(node_p node);
 static void	ngthread(void *);
 static int	ng_apply_item(node_p node, item_p item, int rw);
 static void	ng_flush_input_queue(node_p node);
 static node_p	ng_ID2noderef(ng_ID_t ID);
 static int	ng_con_nodes(item_p item, node_p node, const char *name,
 		    node_p node2, const char *name2);
 static int	ng_con_part2(node_p node, item_p item, hook_p hook);
 static int	ng_con_part3(node_p node, item_p item, hook_p hook);
 static int	ng_mkpeer(node_p node, const char *name, const char *name2,
 		    char *type);
 static void	ng_name_rehash(void);
 static void	ng_ID_rehash(void);
 
 /* Imported, these used to be externally visible, some may go back. */
 void	ng_destroy_hook(hook_p hook);
 int	ng_path2noderef(node_p here, const char *path,
 	node_p *dest, hook_p *lasthook);
 int	ng_make_node(const char *type, node_p *nodepp);
 int	ng_path_parse(char *addr, char **node, char **path, char **hook);
 void	ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3);
 void	ng_unname(node_p node);
 
 /* Our own netgraph malloc type */
 MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages");
 MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage");
 static MALLOC_DEFINE(M_NETGRAPH_HOOK, "netgraph_hook",
     "netgraph hook structures");
 static MALLOC_DEFINE(M_NETGRAPH_NODE, "netgraph_node",
     "netgraph node structures");
 static MALLOC_DEFINE(M_NETGRAPH_ITEM, "netgraph_item",
     "netgraph item structures");
 
 /* Should not be visible outside this file */
 
 #define _NG_ALLOC_HOOK(hook) \
 	hook = malloc(sizeof(*hook), M_NETGRAPH_HOOK, M_NOWAIT | M_ZERO)
 #define _NG_ALLOC_NODE(node) \
 	node = malloc(sizeof(*node), M_NETGRAPH_NODE, M_NOWAIT | M_ZERO)
 
 #define	NG_QUEUE_LOCK_INIT(n)			\
 	mtx_init(&(n)->q_mtx, "ng_node", NULL, MTX_DEF)
 #define	NG_QUEUE_LOCK(n)			\
 	mtx_lock(&(n)->q_mtx)
 #define	NG_QUEUE_UNLOCK(n)			\
 	mtx_unlock(&(n)->q_mtx)
 #define	NG_WORKLIST_LOCK_INIT()			\
 	mtx_init(&ng_worklist_mtx, "ng_worklist", NULL, MTX_DEF)
 #define	NG_WORKLIST_LOCK()			\
 	mtx_lock(&ng_worklist_mtx)
 #define	NG_WORKLIST_UNLOCK()			\
 	mtx_unlock(&ng_worklist_mtx)
 #define	NG_WORKLIST_SLEEP()			\
 	mtx_sleep(&ng_worklist, &ng_worklist_mtx, PI_NET, "sleep", 0)
 #define	NG_WORKLIST_WAKEUP()			\
 	wakeup_one(&ng_worklist)
 
 #ifdef NETGRAPH_DEBUG /*----------------------------------------------*/
 /*
  * In debug mode:
  * In an attempt to help track reference count screwups
  * we do not free objects back to the malloc system, but keep them
  * in a local cache where we can examine them and keep information safely
  * after they have been freed.
  * We use this scheme for nodes and hooks, and to some extent for items.
  */
 static __inline hook_p
 ng_alloc_hook(void)
 {
 	hook_p hook;
 	SLIST_ENTRY(ng_hook) temp;
 	mtx_lock(&ng_nodelist_mtx);
 	hook = LIST_FIRST(&ng_freehooks);
 	if (hook) {
 		LIST_REMOVE(hook, hk_hooks);
 		bcopy(&hook->hk_all, &temp, sizeof(temp));
 		bzero(hook, sizeof(struct ng_hook));
 		bcopy(&temp, &hook->hk_all, sizeof(temp));
 		mtx_unlock(&ng_nodelist_mtx);
 		hook->hk_magic = HK_MAGIC;
 	} else {
 		mtx_unlock(&ng_nodelist_mtx);
 		_NG_ALLOC_HOOK(hook);
 		if (hook) {
 			hook->hk_magic = HK_MAGIC;
 			mtx_lock(&ng_nodelist_mtx);
 			SLIST_INSERT_HEAD(&ng_allhooks, hook, hk_all);
 			mtx_unlock(&ng_nodelist_mtx);
 		}
 	}
 	return (hook);
 }
 
 static __inline node_p
 ng_alloc_node(void)
 {
 	node_p node;
 	SLIST_ENTRY(ng_node) temp;
 	mtx_lock(&ng_nodelist_mtx);
 	node = LIST_FIRST(&ng_freenodes);
 	if (node) {
 		LIST_REMOVE(node, nd_nodes);
 		bcopy(&node->nd_all, &temp, sizeof(temp));
 		bzero(node, sizeof(struct ng_node));
 		bcopy(&temp, &node->nd_all, sizeof(temp));
 		mtx_unlock(&ng_nodelist_mtx);
 		node->nd_magic = ND_MAGIC;
 	} else {
 		mtx_unlock(&ng_nodelist_mtx);
 		_NG_ALLOC_NODE(node);
 		if (node) {
 			node->nd_magic = ND_MAGIC;
 			mtx_lock(&ng_nodelist_mtx);
 			SLIST_INSERT_HEAD(&ng_allnodes, node, nd_all);
 			mtx_unlock(&ng_nodelist_mtx);
 		}
 	}
 	return (node);
 }
 
 #define NG_ALLOC_HOOK(hook) do { (hook) = ng_alloc_hook(); } while (0)
 #define NG_ALLOC_NODE(node) do { (node) = ng_alloc_node(); } while (0)
 
 #define NG_FREE_HOOK(hook)						\
 	do {								\
 		mtx_lock(&ng_nodelist_mtx);				\
 		LIST_INSERT_HEAD(&ng_freehooks, hook, hk_hooks);	\
 		hook->hk_magic = 0;					\
 		mtx_unlock(&ng_nodelist_mtx);				\
 	} while (0)
 
 #define NG_FREE_NODE(node)						\
 	do {								\
 		mtx_lock(&ng_nodelist_mtx);				\
 		LIST_INSERT_HEAD(&ng_freenodes, node, nd_nodes);	\
 		node->nd_magic = 0;					\
 		mtx_unlock(&ng_nodelist_mtx);				\
 	} while (0)
 
 #else /* NETGRAPH_DEBUG */ /*----------------------------------------------*/
 
 #define NG_ALLOC_HOOK(hook) _NG_ALLOC_HOOK(hook)
 #define NG_ALLOC_NODE(node) _NG_ALLOC_NODE(node)
 
 #define NG_FREE_HOOK(hook) do { free((hook), M_NETGRAPH_HOOK); } while (0)
 #define NG_FREE_NODE(node) do { free((node), M_NETGRAPH_NODE); } while (0)
 
 #endif /* NETGRAPH_DEBUG */ /*----------------------------------------------*/
 
 /* Set this to kdb_enter("X") to catch all errors as they occur */
 #ifndef TRAP_ERROR
 #define TRAP_ERROR()
 #endif
 
 VNET_DEFINE_STATIC(ng_ID_t, nextID) = 1;
 #define	V_nextID			VNET(nextID)
 
 #ifdef INVARIANTS
 #define CHECK_DATA_MBUF(m)	do {					\
 		struct mbuf *n;						\
 		int total;						\
 									\
 		M_ASSERTPKTHDR(m);					\
 		for (total = 0, n = (m); n != NULL; n = n->m_next) {	\
 			total += n->m_len;				\
 			if (n->m_nextpkt != NULL)			\
 				panic("%s: m_nextpkt", __func__);	\
 		}							\
 									\
 		if ((m)->m_pkthdr.len != total) {			\
 			panic("%s: %d != %d",				\
 			    __func__, (m)->m_pkthdr.len, total);	\
 		}							\
 	} while (0)
 #else
 #define CHECK_DATA_MBUF(m)
 #endif
 
 #define ERROUT(x)	do { error = (x); goto done; } while (0)
 
 /************************************************************************
 	Parse type definitions for generic messages
 ************************************************************************/
 
 /* Handy structure parse type defining macro */
 #define DEFINE_PARSE_STRUCT_TYPE(lo, up, args)				\
 static const struct ng_parse_struct_field				\
 	ng_ ## lo ## _type_fields[] = NG_GENERIC_ ## up ## _INFO args;	\
 static const struct ng_parse_type ng_generic_ ## lo ## _type = {	\
 	&ng_parse_struct_type,						\
 	&ng_ ## lo ## _type_fields					\
 }
 
 DEFINE_PARSE_STRUCT_TYPE(mkpeer, MKPEER, ());
 DEFINE_PARSE_STRUCT_TYPE(connect, CONNECT, ());
 DEFINE_PARSE_STRUCT_TYPE(name, NAME, ());
 DEFINE_PARSE_STRUCT_TYPE(rmhook, RMHOOK, ());
 DEFINE_PARSE_STRUCT_TYPE(nodeinfo, NODEINFO, ());
 DEFINE_PARSE_STRUCT_TYPE(typeinfo, TYPEINFO, ());
 DEFINE_PARSE_STRUCT_TYPE(linkinfo, LINKINFO, (&ng_generic_nodeinfo_type));
 
 /* Get length of an array when the length is stored as a 32 bit
    value immediately preceding the array -- as with struct namelist
    and struct typelist. */
 static int
 ng_generic_list_getLength(const struct ng_parse_type *type,
 	const u_char *start, const u_char *buf)
 {
 	return *((const u_int32_t *)(buf - 4));
 }
 
 /* Get length of the array of struct linkinfo inside a struct hooklist */
 static int
 ng_generic_linkinfo_getLength(const struct ng_parse_type *type,
 	const u_char *start, const u_char *buf)
 {
 	const struct hooklist *hl = (const struct hooklist *)start;
 
 	return hl->nodeinfo.hooks;
 }
 
 /* Array type for a variable length array of struct namelist */
 static const struct ng_parse_array_info ng_nodeinfoarray_type_info = {
 	&ng_generic_nodeinfo_type,
 	&ng_generic_list_getLength
 };
 static const struct ng_parse_type ng_generic_nodeinfoarray_type = {
 	&ng_parse_array_type,
 	&ng_nodeinfoarray_type_info
 };
 
 /* Array type for a variable length array of struct typelist */
 static const struct ng_parse_array_info ng_typeinfoarray_type_info = {
 	&ng_generic_typeinfo_type,
 	&ng_generic_list_getLength
 };
 static const struct ng_parse_type ng_generic_typeinfoarray_type = {
 	&ng_parse_array_type,
 	&ng_typeinfoarray_type_info
 };
 
 /* Array type for array of struct linkinfo in struct hooklist */
 static const struct ng_parse_array_info ng_generic_linkinfo_array_type_info = {
 	&ng_generic_linkinfo_type,
 	&ng_generic_linkinfo_getLength
 };
 static const struct ng_parse_type ng_generic_linkinfo_array_type = {
 	&ng_parse_array_type,
 	&ng_generic_linkinfo_array_type_info
 };
 
 DEFINE_PARSE_STRUCT_TYPE(typelist, TYPELIST, (&ng_generic_typeinfoarray_type));
 DEFINE_PARSE_STRUCT_TYPE(hooklist, HOOKLIST,
 	(&ng_generic_nodeinfo_type, &ng_generic_linkinfo_array_type));
 DEFINE_PARSE_STRUCT_TYPE(listnodes, LISTNODES,
 	(&ng_generic_nodeinfoarray_type));
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ng_generic_cmds[] = {
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_SHUTDOWN,
 	  "shutdown",
 	  NULL,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_MKPEER,
 	  "mkpeer",
 	  &ng_generic_mkpeer_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_CONNECT,
 	  "connect",
 	  &ng_generic_connect_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_NAME,
 	  "name",
 	  &ng_generic_name_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_RMHOOK,
 	  "rmhook",
 	  &ng_generic_rmhook_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_NODEINFO,
 	  "nodeinfo",
 	  NULL,
 	  &ng_generic_nodeinfo_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTHOOKS,
 	  "listhooks",
 	  NULL,
 	  &ng_generic_hooklist_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTNAMES,
 	  "listnames",
 	  NULL,
 	  &ng_generic_listnodes_type	/* same as NGM_LISTNODES */
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTNODES,
 	  "listnodes",
 	  NULL,
 	  &ng_generic_listnodes_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTTYPES,
 	  "listtypes",
 	  NULL,
 	  &ng_generic_typelist_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_TEXT_CONFIG,
 	  "textconfig",
 	  NULL,
 	  &ng_parse_string_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_TEXT_STATUS,
 	  "textstatus",
 	  NULL,
 	  &ng_parse_string_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_ASCII2BINARY,
 	  "ascii2binary",
 	  &ng_parse_ng_mesg_type,
 	  &ng_parse_ng_mesg_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_BINARY2ASCII,
 	  "binary2ascii",
 	  &ng_parse_ng_mesg_type,
 	  &ng_parse_ng_mesg_type
 	},
 	{ 0 }
 };
 
 /************************************************************************
 			Node routines
 ************************************************************************/
 
 /*
  * Instantiate a node of the requested type
  */
 int
 ng_make_node(const char *typename, node_p *nodepp)
 {
 	struct ng_type *type;
 	int	error;
 
 	/* Check that the type makes sense */
 	if (typename == NULL) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 
 	/* Locate the node type. If we fail we return. Do not try to load
 	 * module.
 	 */
 	if ((type = ng_findtype(typename)) == NULL)
 		return (ENXIO);
 
 	/*
 	 * If we have a constructor, then make the node and
 	 * call the constructor to do type specific initialisation.
 	 */
 	if (type->constructor != NULL) {
 		if ((error = ng_make_node_common(type, nodepp)) == 0) {
 			if ((error = ((*type->constructor)(*nodepp))) != 0) {
 				NG_NODE_UNREF(*nodepp);
 			}
 		}
 	} else {
 		/*
 		 * Node has no constructor. We cannot ask for one
 		 * to be made. It must be brought into existence by
 		 * some external agency. The external agency should
 		 * call ng_make_node_common() directly to get the
 		 * netgraph part initialised.
 		 */
 		TRAP_ERROR();
 		error = EINVAL;
 	}
 	return (error);
 }
 
 /*
  * Generic node creation. Called by node initialisation for externally
  * instantiated nodes (e.g. hardware, sockets, etc ).
  * The returned node has a reference count of 1.
  */
 int
 ng_make_node_common(struct ng_type *type, node_p *nodepp)
 {
 	node_p node;
 
 	/* Require the node type to have been already installed */
 	if (ng_findtype(type->name) == NULL) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 
 	/* Make a node and try attach it to the type */
 	NG_ALLOC_NODE(node);
 	if (node == NULL) {
 		TRAP_ERROR();
 		return (ENOMEM);
 	}
 	node->nd_type = type;
 #ifdef VIMAGE
 	node->nd_vnet = curvnet;
 #endif
 	NG_NODE_REF(node);				/* note reference */
 	type->refs++;
 
 	NG_QUEUE_LOCK_INIT(&node->nd_input_queue);
 	STAILQ_INIT(&node->nd_input_queue.queue);
 	node->nd_input_queue.q_flags = 0;
 
 	/* Initialize hook list for new node */
 	LIST_INIT(&node->nd_hooks);
 
 	/* Get an ID and put us in the hash chain. */
 	IDHASH_WLOCK();
 	for (;;) { /* wrap protection, even if silly */
 		node_p node2 = NULL;
 		node->nd_ID = V_nextID++; /* 137/sec for 1 year before wrap */
 
 		/* Is there a problem with the new number? */
 		NG_IDHASH_FIND(node->nd_ID, node2); /* already taken? */
 		if ((node->nd_ID != 0) && (node2 == NULL)) {
 			break;
 		}
 	}
 	V_ng_nodes++;
 	if (V_ng_nodes * 2 > V_ng_ID_hmask)
 		ng_ID_rehash();
 	LIST_INSERT_HEAD(&V_ng_ID_hash[NG_IDHASH_FN(node->nd_ID)], node,
 	    nd_idnodes);
 	IDHASH_WUNLOCK();
 
 	/* Done */
 	*nodepp = node;
 	return (0);
 }
 
 /*
  * Forceably start the shutdown process on a node. Either call
  * its shutdown method, or do the default shutdown if there is
  * no type-specific method.
  *
  * We can only be called from a shutdown message, so we know we have
  * a writer lock, and therefore exclusive access. It also means
  * that we should not be on the work queue, but we check anyhow.
  *
  * Persistent node types must have a type-specific method which
  * allocates a new node in which case, this one is irretrievably going away,
  * or cleans up anything it needs, and just makes the node valid again,
  * in which case we allow the node to survive.
  *
  * XXX We need to think of how to tell a persistent node that we
  * REALLY need to go away because the hardware has gone or we
  * are rebooting.... etc.
  */
 void
 ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3)
 {
 	hook_p hook;
 
 	/* Check if it's already shutting down */
 	if ((node->nd_flags & NGF_CLOSING) != 0)
 		return;
 
 	if (node == &ng_deadnode) {
 		printf ("shutdown called on deadnode\n");
 		return;
 	}
 
 	/* Add an extra reference so it doesn't go away during this */
 	NG_NODE_REF(node);
 
 	/*
 	 * Mark it invalid so any newcomers know not to try use it
 	 * Also add our own mark so we can't recurse
 	 * note that NGF_INVALID does not do this as it's also set during
 	 * creation
 	 */
 	node->nd_flags |= NGF_INVALID|NGF_CLOSING;
 
 	/* If node has its pre-shutdown method, then call it first*/
 	if (node->nd_type && node->nd_type->close)
 		(*node->nd_type->close)(node);
 
 	/* Notify all remaining connected nodes to disconnect */
 	while ((hook = LIST_FIRST(&node->nd_hooks)) != NULL)
 		ng_destroy_hook(hook);
 
 	/*
 	 * Drain the input queue forceably.
 	 * it has no hooks so what's it going to do, bleed on someone?
 	 * Theoretically we came here from a queue entry that was added
 	 * Just before the queue was closed, so it should be empty anyway.
 	 * Also removes us from worklist if needed.
 	 */
 	ng_flush_input_queue(node);
 
 	/* Ask the type if it has anything to do in this case */
 	if (node->nd_type && node->nd_type->shutdown) {
 		(*node->nd_type->shutdown)(node);
 		if (NG_NODE_IS_VALID(node)) {
 			/*
 			 * Well, blow me down if the node code hasn't declared
 			 * that it doesn't want to die.
 			 * Presumably it is a persistent node.
 			 * If we REALLY want it to go away,
 			 *  e.g. hardware going away,
 			 * Our caller should set NGF_REALLY_DIE in nd_flags.
 			 */
 			node->nd_flags &= ~(NGF_INVALID|NGF_CLOSING);
 			NG_NODE_UNREF(node); /* Assume they still have theirs */
 			return;
 		}
 	} else {				/* do the default thing */
 		NG_NODE_UNREF(node);
 	}
 
 	ng_unname(node); /* basically a NOP these days */
 
 	/*
 	 * Remove extra reference, possibly the last
 	 * Possible other holders of references may include
 	 * timeout callouts, but theoretically the node's supposed to
 	 * have cancelled them. Possibly hardware dependencies may
 	 * force a driver to 'linger' with a reference.
 	 */
 	NG_NODE_UNREF(node);
 }
 
 /*
  * Remove a reference to the node, possibly the last.
  * deadnode always acts as it were the last.
  */
 void
 ng_unref_node(node_p node)
 {
 
 	if (node == &ng_deadnode)
 		return;
 
 	CURVNET_SET(node->nd_vnet);
 
 	if (refcount_release(&node->nd_refs)) { /* we were the last */
 
 		node->nd_type->refs--; /* XXX maybe should get types lock? */
 		NAMEHASH_WLOCK();
 		if (NG_NODE_HAS_NAME(node)) {
 			V_ng_named_nodes--;
 			LIST_REMOVE(node, nd_nodes);
 		}
 		NAMEHASH_WUNLOCK();
 
 		IDHASH_WLOCK();
 		V_ng_nodes--;
 		LIST_REMOVE(node, nd_idnodes);
 		IDHASH_WUNLOCK();
 
 		mtx_destroy(&node->nd_input_queue.q_mtx);
 		NG_FREE_NODE(node);
 	}
 	CURVNET_RESTORE();
 }
 
 /************************************************************************
 			Node ID handling
 ************************************************************************/
 static node_p
 ng_ID2noderef(ng_ID_t ID)
 {
 	node_p node;
 
 	IDHASH_RLOCK();
 	NG_IDHASH_FIND(ID, node);
 	if (node)
 		NG_NODE_REF(node);
 	IDHASH_RUNLOCK();
 	return(node);
 }
 
 ng_ID_t
 ng_node2ID(node_cp node)
 {
 	return (node ? NG_NODE_ID(node) : 0);
 }
 
 /************************************************************************
 			Node name handling
 ************************************************************************/
 
 /*
  * Assign a node a name.
  */
 int
 ng_name_node(node_p node, const char *name)
 {
 	uint32_t hash;
 	node_p node2;
 	int i;
 
 	/* Rename without change is a noop */
 	if (strcmp(NG_NODE_NAME(node), name) == 0)
 		return (0);
 
 	/* Check the name is valid */
 	for (i = 0; i < NG_NODESIZ; i++) {
 		if (name[i] == '\0' || name[i] == '.' || name[i] == ':')
 			break;
 	}
 	if (i == 0 || name[i] != '\0') {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 	if (ng_decodeidname(name) != 0) { /* valid IDs not allowed here */
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 
 	NAMEHASH_WLOCK();
 	if (V_ng_named_nodes * 2 > V_ng_name_hmask)
 		ng_name_rehash();
 
 	hash = hash32_str(name, HASHINIT) & V_ng_name_hmask;
 	/* Check the name isn't already being used. */
 	LIST_FOREACH(node2, &V_ng_name_hash[hash], nd_nodes)
 		if (NG_NODE_IS_VALID(node2) &&
 		    (strcmp(NG_NODE_NAME(node2), name) == 0)) {
 			NAMEHASH_WUNLOCK();
 			return (EADDRINUSE);
 		}
 
 	if (NG_NODE_HAS_NAME(node))
 		LIST_REMOVE(node, nd_nodes);
 	else
 		V_ng_named_nodes++;
 	/* Copy it. */
 	strlcpy(NG_NODE_NAME(node), name, NG_NODESIZ);
 	/* Update name hash. */
 	LIST_INSERT_HEAD(&V_ng_name_hash[hash], node, nd_nodes);
 	NAMEHASH_WUNLOCK();
 
 	return (0);
 }
 
 /*
  * Find a node by absolute name. The name should NOT end with ':'
  * The name "." means "this node" and "[xxx]" means "the node
  * with ID (ie, at address) xxx".
  *
  * Returns the node if found, else NULL.
  * Eventually should add something faster than a sequential search.
  * Note it acquires a reference on the node so you can be sure it's still
  * there.
  */
 node_p
 ng_name2noderef(node_p here, const char *name)
 {
 	node_p node;
 	ng_ID_t temp;
 	int	hash;
 
 	/* "." means "this node" */
 	if (strcmp(name, ".") == 0) {
 		NG_NODE_REF(here);
 		return(here);
 	}
 
 	/* Check for name-by-ID */
 	if ((temp = ng_decodeidname(name)) != 0) {
 		return (ng_ID2noderef(temp));
 	}
 
 	/* Find node by name. */
 	hash = hash32_str(name, HASHINIT) & V_ng_name_hmask;
 	NAMEHASH_RLOCK();
 	LIST_FOREACH(node, &V_ng_name_hash[hash], nd_nodes)
 		if (NG_NODE_IS_VALID(node) &&
 		    (strcmp(NG_NODE_NAME(node), name) == 0)) {
 			NG_NODE_REF(node);
 			break;
 		}
 	NAMEHASH_RUNLOCK();
 
 	return (node);
 }
 
 /*
  * Decode an ID name, eg. "[f03034de]". Returns 0 if the
  * string is not valid, otherwise returns the value.
  */
 static ng_ID_t
 ng_decodeidname(const char *name)
 {
 	const int len = strlen(name);
 	char *eptr;
 	u_long val;
 
 	/* Check for proper length, brackets, no leading junk */
 	if ((len < 3) || (name[0] != '[') || (name[len - 1] != ']') ||
 	    (!isxdigit(name[1])))
 		return ((ng_ID_t)0);
 
 	/* Decode number */
 	val = strtoul(name + 1, &eptr, 16);
 	if ((eptr - name != len - 1) || (val == ULONG_MAX) || (val == 0))
 		return ((ng_ID_t)0);
 
 	return ((ng_ID_t)val);
 }
 
 /*
  * Remove a name from a node. This should only be called
  * when shutting down and removing the node.
  */
 void
 ng_unname(node_p node)
 {
 }
 
 /*
  * Allocate a bigger name hash.
  */
 static void
 ng_name_rehash(void)
 {
 	struct nodehash *new;
 	uint32_t hash;
 	u_long hmask;
 	node_p node, node2;
 	int i;
 
 	new = hashinit_flags((V_ng_name_hmask + 1) * 2, M_NETGRAPH_NODE, &hmask,
 	    HASH_NOWAIT);
 	if (new == NULL)
 		return;
 
 	for (i = 0; i <= V_ng_name_hmask; i++)
 		LIST_FOREACH_SAFE(node, &V_ng_name_hash[i], nd_nodes, node2) {
 #ifdef INVARIANTS
 			LIST_REMOVE(node, nd_nodes);
 #endif
 			hash = hash32_str(NG_NODE_NAME(node), HASHINIT) & hmask;
 			LIST_INSERT_HEAD(&new[hash], node, nd_nodes);
 		}
 
 	hashdestroy(V_ng_name_hash, M_NETGRAPH_NODE, V_ng_name_hmask);
 	V_ng_name_hash = new;
 	V_ng_name_hmask = hmask;
 }
 
 /*
  * Allocate a bigger ID hash.
  */
 static void
 ng_ID_rehash(void)
 {
 	struct nodehash *new;
 	uint32_t hash;
 	u_long hmask;
 	node_p node, node2;
 	int i;
 
 	new = hashinit_flags((V_ng_ID_hmask + 1) * 2, M_NETGRAPH_NODE, &hmask,
 	    HASH_NOWAIT);
 	if (new == NULL)
 		return;
 
 	for (i = 0; i <= V_ng_ID_hmask; i++)
 		LIST_FOREACH_SAFE(node, &V_ng_ID_hash[i], nd_idnodes, node2) {
 #ifdef INVARIANTS
 			LIST_REMOVE(node, nd_idnodes);
 #endif
 			hash = (node->nd_ID % (hmask + 1));
 			LIST_INSERT_HEAD(&new[hash], node, nd_idnodes);
 		}
 
 	hashdestroy(V_ng_ID_hash, M_NETGRAPH_NODE, V_ng_name_hmask);
 	V_ng_ID_hash = new;
 	V_ng_ID_hmask = hmask;
 }
 
 /************************************************************************
 			Hook routines
  Names are not optional. Hooks are always connected, except for a
  brief moment within these routines. On invalidation or during creation
  they are connected to the 'dead' hook.
 ************************************************************************/
 
 /*
  * Remove a hook reference
  */
 void
 ng_unref_hook(hook_p hook)
 {
 
 	if (hook == &ng_deadhook)
 		return;
 
 	if (refcount_release(&hook->hk_refs)) { /* we were the last */
 		if (_NG_HOOK_NODE(hook)) /* it'll probably be ng_deadnode */
 			_NG_NODE_UNREF((_NG_HOOK_NODE(hook)));
 		NG_FREE_HOOK(hook);
 	}
 }
 
 /*
  * Add an unconnected hook to a node. Only used internally.
  * Assumes node is locked. (XXX not yet true )
  */
 static int
 ng_add_hook(node_p node, const char *name, hook_p *hookp)
 {
 	hook_p hook;
 	int error = 0;
 
 	/* Check that the given name is good */
 	if (name == NULL) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 	if (ng_findhook(node, name) != NULL) {
 		TRAP_ERROR();
 		return (EEXIST);
 	}
 
 	/* Allocate the hook and link it up */
 	NG_ALLOC_HOOK(hook);
 	if (hook == NULL) {
 		TRAP_ERROR();
 		return (ENOMEM);
 	}
 	hook->hk_refs = 1;		/* add a reference for us to return */
 	hook->hk_flags = HK_INVALID;
 	hook->hk_peer = &ng_deadhook;	/* start off this way */
 	hook->hk_node = node;
 	NG_NODE_REF(node);		/* each hook counts as a reference */
 
 	/* Set hook name */
 	strlcpy(NG_HOOK_NAME(hook), name, NG_HOOKSIZ);
 
 	/*
 	 * Check if the node type code has something to say about it
 	 * If it fails, the unref of the hook will also unref the node.
 	 */
 	if (node->nd_type->newhook != NULL) {
 		if ((error = (*node->nd_type->newhook)(node, hook, name))) {
 			NG_HOOK_UNREF(hook);	/* this frees the hook */
 			return (error);
 		}
 	}
 	/*
 	 * The 'type' agrees so far, so go ahead and link it in.
 	 * We'll ask again later when we actually connect the hooks.
 	 */
 	LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks);
 	node->nd_numhooks++;
 	NG_HOOK_REF(hook);	/* one for the node */
 
 	if (hookp)
 		*hookp = hook;
 	return (0);
 }
 
 /*
  * Find a hook
  *
  * Node types may supply their own optimized routines for finding
  * hooks.  If none is supplied, we just do a linear search.
  * XXX Possibly we should add a reference to the hook?
  */
 hook_p
 ng_findhook(node_p node, const char *name)
 {
 	hook_p hook;
 
 	if (node->nd_type->findhook != NULL)
 		return (*node->nd_type->findhook)(node, name);
 	LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) {
 		if (NG_HOOK_IS_VALID(hook) &&
 		    (strcmp(NG_HOOK_NAME(hook), name) == 0))
 			return (hook);
 	}
 	return (NULL);
 }
 
 /*
  * Destroy a hook
  *
  * As hooks are always attached, this really destroys two hooks.
  * The one given, and the one attached to it. Disconnect the hooks
  * from each other first. We reconnect the peer hook to the 'dead'
  * hook so that it can still exist after we depart. We then
  * send the peer its own destroy message. This ensures that we only
  * interact with the peer's structures when it is locked processing that
  * message. We hold a reference to the peer hook so we are guaranteed that
  * the peer hook and node are still going to exist until
  * we are finished there as the hook holds a ref on the node.
  * We run this same code again on the peer hook, but that time it is already
  * attached to the 'dead' hook.
  *
  * This routine is called at all stages of hook creation
  * on error detection and must be able to handle any such stage.
  */
 void
 ng_destroy_hook(hook_p hook)
 {
 	hook_p peer;
 	node_p node;
 
 	if (hook == &ng_deadhook) {	/* better safe than sorry */
 		printf("ng_destroy_hook called on deadhook\n");
 		return;
 	}
 
 	/*
 	 * Protect divorce process with mutex, to avoid races on
 	 * simultaneous disconnect.
 	 */
 	TOPOLOGY_WLOCK();
 
 	hook->hk_flags |= HK_INVALID;
 
 	peer = NG_HOOK_PEER(hook);
 	node = NG_HOOK_NODE(hook);
 
 	if (peer && (peer != &ng_deadhook)) {
 		/*
 		 * Set the peer to point to ng_deadhook
 		 * from this moment on we are effectively independent it.
 		 * send it an rmhook message of its own.
 		 */
 		peer->hk_peer = &ng_deadhook;	/* They no longer know us */
 		hook->hk_peer = &ng_deadhook;	/* Nor us, them */
 		if (NG_HOOK_NODE(peer) == &ng_deadnode) {
 			/*
 			 * If it's already divorced from a node,
 			 * just free it.
 			 */
 			TOPOLOGY_WUNLOCK();
 		} else {
 			TOPOLOGY_WUNLOCK();
 			ng_rmhook_self(peer); 	/* Send it a surprise */
 		}
 		NG_HOOK_UNREF(peer);		/* account for peer link */
 		NG_HOOK_UNREF(hook);		/* account for peer link */
 	} else
 		TOPOLOGY_WUNLOCK();
 
 	TOPOLOGY_NOTOWNED();
 
 	/*
 	 * Remove the hook from the node's list to avoid possible recursion
 	 * in case the disconnection results in node shutdown.
 	 */
 	if (node == &ng_deadnode) { /* happens if called from ng_con_nodes() */
 		return;
 	}
 	LIST_REMOVE(hook, hk_hooks);
 	node->nd_numhooks--;
 	if (node->nd_type->disconnect) {
 		/*
 		 * The type handler may elect to destroy the node so don't
 		 * trust its existence after this point. (except
 		 * that we still hold a reference on it. (which we
 		 * inherrited from the hook we are destroying)
 		 */
 		(*node->nd_type->disconnect) (hook);
 	}
 
 	/*
 	 * Note that because we will point to ng_deadnode, the original node
 	 * is not decremented automatically so we do that manually.
 	 */
 	_NG_HOOK_NODE(hook) = &ng_deadnode;
 	NG_NODE_UNREF(node);	/* We no longer point to it so adjust count */
 	NG_HOOK_UNREF(hook);	/* Account for linkage (in list) to node */
 }
 
 /*
  * Take two hooks on a node and merge the connection so that the given node
  * is effectively bypassed.
  */
 int
 ng_bypass(hook_p hook1, hook_p hook2)
 {
 	if (hook1->hk_node != hook2->hk_node) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 	TOPOLOGY_WLOCK();
 	if (NG_HOOK_NOT_VALID(hook1) || NG_HOOK_NOT_VALID(hook2)) {
 		TOPOLOGY_WUNLOCK();
 		return (EINVAL);
 	}
 	hook1->hk_peer->hk_peer = hook2->hk_peer;
 	hook2->hk_peer->hk_peer = hook1->hk_peer;
 
 	hook1->hk_peer = &ng_deadhook;
 	hook2->hk_peer = &ng_deadhook;
 	TOPOLOGY_WUNLOCK();
 
 	NG_HOOK_UNREF(hook1);
 	NG_HOOK_UNREF(hook2);
 
 	/* XXX If we ever cache methods on hooks update them as well */
 	ng_destroy_hook(hook1);
 	ng_destroy_hook(hook2);
 	return (0);
 }
 
 /*
  * Install a new netgraph type
  */
 int
 ng_newtype(struct ng_type *tp)
 {
 	const size_t namelen = strlen(tp->name);
 
 	/* Check version and type name fields */
 	if ((tp->version != NG_ABI_VERSION) || (namelen == 0) ||
 	    (namelen >= NG_TYPESIZ)) {
 		TRAP_ERROR();
 		if (tp->version != NG_ABI_VERSION) {
 			printf("Netgraph: Node type rejected. ABI mismatch. "
 			    "Suggest recompile\n");
 		}
 		return (EINVAL);
 	}
 
 	/* Check for name collision */
 	if (ng_findtype(tp->name) != NULL) {
 		TRAP_ERROR();
 		return (EEXIST);
 	}
 
 	/* Link in new type */
 	TYPELIST_WLOCK();
 	LIST_INSERT_HEAD(&ng_typelist, tp, types);
 	tp->refs = 1;	/* first ref is linked list */
 	TYPELIST_WUNLOCK();
 	return (0);
 }
 
 /*
  * unlink a netgraph type
  * If no examples exist
  */
 int
 ng_rmtype(struct ng_type *tp)
 {
 	/* Check for name collision */
 	if (tp->refs != 1) {
 		TRAP_ERROR();
 		return (EBUSY);
 	}
 
 	/* Unlink type */
 	TYPELIST_WLOCK();
 	LIST_REMOVE(tp, types);
 	TYPELIST_WUNLOCK();
 	return (0);
 }
 
 /*
  * Look for a type of the name given
  */
 struct ng_type *
 ng_findtype(const char *typename)
 {
 	struct ng_type *type;
 
 	TYPELIST_RLOCK();
 	LIST_FOREACH(type, &ng_typelist, types) {
 		if (strcmp(type->name, typename) == 0)
 			break;
 	}
 	TYPELIST_RUNLOCK();
 	return (type);
 }
 
 /************************************************************************
 			Composite routines
 ************************************************************************/
 /*
  * Connect two nodes using the specified hooks, using queued functions.
  */
 static int
 ng_con_part3(node_p node, item_p item, hook_p hook)
 {
 	int	error = 0;
 
 	/*
 	 * When we run, we know that the node 'node' is locked for us.
 	 * Our caller has a reference on the hook.
 	 * Our caller has a reference on the node.
 	 * (In this case our caller is ng_apply_item() ).
 	 * The peer hook has a reference on the hook.
 	 * We are all set up except for the final call to the node, and
 	 * the clearing of the INVALID flag.
 	 */
 	if (NG_HOOK_NODE(hook) == &ng_deadnode) {
 		/*
 		 * The node must have been freed again since we last visited
 		 * here. ng_destry_hook() has this effect but nothing else does.
 		 * We should just release our references and
 		 * free anything we can think of.
 		 * Since we know it's been destroyed, and it's our caller
 		 * that holds the references, just return.
 		 */
 		ERROUT(ENOENT);
 	}
 	if (hook->hk_node->nd_type->connect) {
 		if ((error = (*hook->hk_node->nd_type->connect) (hook))) {
 			ng_destroy_hook(hook);	/* also zaps peer */
 			printf("failed in ng_con_part3()\n");
 			ERROUT(error);
 		}
 	}
 	/*
 	 *  XXX this is wrong for SMP. Possibly we need
 	 * to separate out 'create' and 'invalid' flags.
 	 * should only set flags on hooks we have locked under our node.
 	 */
 	hook->hk_flags &= ~HK_INVALID;
 done:
 	NG_FREE_ITEM(item);
 	return (error);
 }
 
 static int
 ng_con_part2(node_p node, item_p item, hook_p hook)
 {
 	hook_p	peer;
 	int	error = 0;
 
 	/*
 	 * When we run, we know that the node 'node' is locked for us.
 	 * Our caller has a reference on the hook.
 	 * Our caller has a reference on the node.
 	 * (In this case our caller is ng_apply_item() ).
 	 * The peer hook has a reference on the hook.
 	 * our node pointer points to the 'dead' node.
 	 * First check the hook name is unique.
 	 * Should not happen because we checked before queueing this.
 	 */
 	if (ng_findhook(node, NG_HOOK_NAME(hook)) != NULL) {
 		TRAP_ERROR();
 		ng_destroy_hook(hook); /* should destroy peer too */
 		printf("failed in ng_con_part2()\n");
 		ERROUT(EEXIST);
 	}
 	/*
 	 * Check if the node type code has something to say about it
 	 * If it fails, the unref of the hook will also unref the attached node,
 	 * however since that node is 'ng_deadnode' this will do nothing.
 	 * The peer hook will also be destroyed.
 	 */
 	if (node->nd_type->newhook != NULL) {
 		if ((error = (*node->nd_type->newhook)(node, hook,
 		    hook->hk_name))) {
 			ng_destroy_hook(hook); /* should destroy peer too */
 			printf("failed in ng_con_part2()\n");
 			ERROUT(error);
 		}
 	}
 
 	/*
 	 * The 'type' agrees so far, so go ahead and link it in.
 	 * We'll ask again later when we actually connect the hooks.
 	 */
 	hook->hk_node = node;		/* just overwrite ng_deadnode */
 	NG_NODE_REF(node);		/* each hook counts as a reference */
 	LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks);
 	node->nd_numhooks++;
 	NG_HOOK_REF(hook);	/* one for the node */
 
 	/*
 	 * We now have a symmetrical situation, where both hooks have been
 	 * linked to their nodes, the newhook methods have been called
 	 * And the references are all correct. The hooks are still marked
 	 * as invalid, as we have not called the 'connect' methods
 	 * yet.
 	 * We can call the local one immediately as we have the
 	 * node locked, but we need to queue the remote one.
 	 */
 	if (hook->hk_node->nd_type->connect) {
 		if ((error = (*hook->hk_node->nd_type->connect) (hook))) {
 			ng_destroy_hook(hook);	/* also zaps peer */
 			printf("failed in ng_con_part2(A)\n");
 			ERROUT(error);
 		}
 	}
 
 	/*
 	 * Acquire topo mutex to avoid race with ng_destroy_hook().
 	 */
 	TOPOLOGY_RLOCK();
 	peer = hook->hk_peer;
 	if (peer == &ng_deadhook) {
 		TOPOLOGY_RUNLOCK();
 		printf("failed in ng_con_part2(B)\n");
 		ng_destroy_hook(hook);
 		ERROUT(ENOENT);
 	}
 	TOPOLOGY_RUNLOCK();
 
 	if ((error = ng_send_fn2(peer->hk_node, peer, item, &ng_con_part3,
 	    NULL, 0, NG_REUSE_ITEM))) {
 		printf("failed in ng_con_part2(C)\n");
 		ng_destroy_hook(hook);	/* also zaps peer */
 		return (error);		/* item was consumed. */
 	}
 	hook->hk_flags &= ~HK_INVALID; /* need both to be able to work */
 	return (0);			/* item was consumed. */
 done:
 	NG_FREE_ITEM(item);
 	return (error);
 }
 
 /*
  * Connect this node with another node. We assume that this node is
  * currently locked, as we are only called from an NGM_CONNECT message.
  */
 static int
 ng_con_nodes(item_p item, node_p node, const char *name,
     node_p node2, const char *name2)
 {
 	int	error;
 	hook_p	hook;
 	hook_p	hook2;
 
 	if (ng_findhook(node2, name2) != NULL) {
 		return(EEXIST);
 	}
 	if ((error = ng_add_hook(node, name, &hook)))  /* gives us a ref */
 		return (error);
 	/* Allocate the other hook and link it up */
 	NG_ALLOC_HOOK(hook2);
 	if (hook2 == NULL) {
 		TRAP_ERROR();
 		ng_destroy_hook(hook);	/* XXX check ref counts so far */
 		NG_HOOK_UNREF(hook);	/* including our ref */
 		return (ENOMEM);
 	}
 	hook2->hk_refs = 1;		/* start with a reference for us. */
 	hook2->hk_flags = HK_INVALID;
 	hook2->hk_peer = hook;		/* Link the two together */
 	hook->hk_peer = hook2;	
 	NG_HOOK_REF(hook);		/* Add a ref for the peer to each*/
 	NG_HOOK_REF(hook2);
 	hook2->hk_node = &ng_deadnode;
 	strlcpy(NG_HOOK_NAME(hook2), name2, NG_HOOKSIZ);
 
 	/*
 	 * Queue the function above.
 	 * Procesing continues in that function in the lock context of
 	 * the other node.
 	 */
 	if ((error = ng_send_fn2(node2, hook2, item, &ng_con_part2, NULL, 0,
 	    NG_NOFLAGS))) {
 		printf("failed in ng_con_nodes(): %d\n", error);
 		ng_destroy_hook(hook);	/* also zaps peer */
 	}
 
 	NG_HOOK_UNREF(hook);		/* Let each hook go if it wants to */
 	NG_HOOK_UNREF(hook2);
 	return (error);
 }
 
 /*
  * Make a peer and connect.
  * We assume that the local node is locked.
  * The new node probably doesn't need a lock until
  * it has a hook, because it cannot really have any work until then,
  * but we should think about it a bit more.
  *
  * The problem may come if the other node also fires up
  * some hardware or a timer or some other source of activation,
  * also it may already get a command msg via it's ID.
  *
  * We could use the same method as ng_con_nodes() but we'd have
  * to add ability to remove the node when failing. (Not hard, just
  * make arg1 point to the node to remove).
  * Unless of course we just ignore failure to connect and leave
  * an unconnected node?
  */
 static int
 ng_mkpeer(node_p node, const char *name, const char *name2, char *type)
 {
 	node_p	node2;
 	hook_p	hook1, hook2;
 	int	error;
 
 	if ((error = ng_make_node(type, &node2))) {
 		return (error);
 	}
 
 	if ((error = ng_add_hook(node, name, &hook1))) { /* gives us a ref */
 		ng_rmnode(node2, NULL, NULL, 0);
 		return (error);
 	}
 
 	if ((error = ng_add_hook(node2, name2, &hook2))) {
 		ng_rmnode(node2, NULL, NULL, 0);
 		ng_destroy_hook(hook1);
 		NG_HOOK_UNREF(hook1);
 		return (error);
 	}
 
 	/*
 	 * Actually link the two hooks together.
 	 */
 	hook1->hk_peer = hook2;
 	hook2->hk_peer = hook1;
 
 	/* Each hook is referenced by the other */
 	NG_HOOK_REF(hook1);
 	NG_HOOK_REF(hook2);
 
 	/* Give each node the opportunity to veto the pending connection */
 	if (hook1->hk_node->nd_type->connect) {
 		error = (*hook1->hk_node->nd_type->connect) (hook1);
 	}
 
 	if ((error == 0) && hook2->hk_node->nd_type->connect) {
 		error = (*hook2->hk_node->nd_type->connect) (hook2);
 	}
 
 	/*
 	 * drop the references we were holding on the two hooks.
 	 */
 	if (error) {
 		ng_destroy_hook(hook2);	/* also zaps hook1 */
 		ng_rmnode(node2, NULL, NULL, 0);
 	} else {
 		/* As a last act, allow the hooks to be used */
 		hook1->hk_flags &= ~HK_INVALID;
 		hook2->hk_flags &= ~HK_INVALID;
 	}
 	NG_HOOK_UNREF(hook1);
 	NG_HOOK_UNREF(hook2);
 	return (error);
 }
 
 /************************************************************************
 		Utility routines to send self messages
 ************************************************************************/
 
 /* Shut this node down as soon as everyone is clear of it */
 /* Should add arg "immediately" to jump the queue */
 int
 ng_rmnode_self(node_p node)
 {
 	int		error;
 
 	if (node == &ng_deadnode)
 		return (0);
 	node->nd_flags |= NGF_INVALID;
 	if (node->nd_flags & NGF_CLOSING)
 		return (0);
 
 	error = ng_send_fn(node, NULL, &ng_rmnode, NULL, 0);
 	return (error);
 }
 
 static void
 ng_rmhook_part2(node_p node, hook_p hook, void *arg1, int arg2)
 {
 	ng_destroy_hook(hook);
 	return ;
 }
 
 int
 ng_rmhook_self(hook_p hook)
 {
 	int		error;
 	node_p node = NG_HOOK_NODE(hook);
 
 	if (node == &ng_deadnode)
 		return (0);
 
 	error = ng_send_fn(node, hook, &ng_rmhook_part2, NULL, 0);
 	return (error);
 }
 
 /***********************************************************************
  * Parse and verify a string of the form:  <NODE:><PATH>
  *
  * Such a string can refer to a specific node or a specific hook
  * on a specific node, depending on how you look at it. In the
  * latter case, the PATH component must not end in a dot.
  *
  * Both <NODE:> and <PATH> are optional. The <PATH> is a string
  * of hook names separated by dots. This breaks out the original
  * string, setting *nodep to "NODE" (or NULL if none) and *pathp
  * to "PATH" (or NULL if degenerate). Also, *hookp will point to
  * the final hook component of <PATH>, if any, otherwise NULL.
  *
  * This returns -1 if the path is malformed. The char ** are optional.
  ***********************************************************************/
 int
 ng_path_parse(char *addr, char **nodep, char **pathp, char **hookp)
 {
 	char	*node, *path, *hook;
 	int	k;
 
 	/*
 	 * Extract absolute NODE, if any
 	 */
 	for (path = addr; *path && *path != ':'; path++);
 	if (*path) {
 		node = addr;	/* Here's the NODE */
 		*path++ = '\0';	/* Here's the PATH */
 
 		/* Node name must not be empty */
 		if (!*node)
 			return -1;
 
 		/* A name of "." is OK; otherwise '.' not allowed */
 		if (strcmp(node, ".") != 0) {
 			for (k = 0; node[k]; k++)
 				if (node[k] == '.')
 					return -1;
 		}
 	} else {
 		node = NULL;	/* No absolute NODE */
 		path = addr;	/* Here's the PATH */
 	}
 
 	/* Snoop for illegal characters in PATH */
 	for (k = 0; path[k]; k++)
 		if (path[k] == ':')
 			return -1;
 
 	/* Check for no repeated dots in PATH */
 	for (k = 0; path[k]; k++)
 		if (path[k] == '.' && path[k + 1] == '.')
 			return -1;
 
 	/* Remove extra (degenerate) dots from beginning or end of PATH */
 	if (path[0] == '.')
 		path++;
 	if (*path && path[strlen(path) - 1] == '.')
 		path[strlen(path) - 1] = 0;
 
 	/* If PATH has a dot, then we're not talking about a hook */
 	if (*path) {
 		for (hook = path, k = 0; path[k]; k++)
 			if (path[k] == '.') {
 				hook = NULL;
 				break;
 			}
 	} else
 		path = hook = NULL;
 
 	/* Done */
 	if (nodep)
 		*nodep = node;
 	if (pathp)
 		*pathp = path;
 	if (hookp)
 		*hookp = hook;
 	return (0);
 }
 
 /*
  * Given a path, which may be absolute or relative, and a starting node,
  * return the destination node.
  */
 int
 ng_path2noderef(node_p here, const char *address, node_p *destp,
     hook_p *lasthook)
 {
 	char    fullpath[NG_PATHSIZ];
 	char   *nodename, *path;
 	node_p  node, oldnode;
 
 	/* Initialize */
 	if (destp == NULL) {
 		TRAP_ERROR();
 		return EINVAL;
 	}
 	*destp = NULL;
 
 	/* Make a writable copy of address for ng_path_parse() */
 	strncpy(fullpath, address, sizeof(fullpath) - 1);
 	fullpath[sizeof(fullpath) - 1] = '\0';
 
 	/* Parse out node and sequence of hooks */
 	if (ng_path_parse(fullpath, &nodename, &path, NULL) < 0) {
 		TRAP_ERROR();
 		return EINVAL;
 	}
 
 	/*
 	 * For an absolute address, jump to the starting node.
 	 * Note that this holds a reference on the node for us.
 	 * Don't forget to drop the reference if we don't need it.
 	 */
 	if (nodename) {
 		node = ng_name2noderef(here, nodename);
 		if (node == NULL) {
 			TRAP_ERROR();
 			return (ENOENT);
 		}
 	} else {
 		if (here == NULL) {
 			TRAP_ERROR();
 			return (EINVAL);
 		}
 		node = here;
 		NG_NODE_REF(node);
 	}
 
 	if (path == NULL) {
 		if (lasthook != NULL)
 			*lasthook = NULL;
 		*destp = node;
 		return (0);
 	}
 
 	/*
 	 * Now follow the sequence of hooks
 	 *
 	 * XXXGL: The path may demolish as we go the sequence, but if
 	 * we hold the topology mutex at critical places, then, I hope,
 	 * we would always have valid pointers in hand, although the
 	 * path behind us may no longer exist.
 	 */
 	for (;;) {
 		hook_p hook;
 		char *segment;
 
 		/*
 		 * Break out the next path segment. Replace the dot we just
 		 * found with a NUL; "path" points to the next segment (or the
 		 * NUL at the end).
 		 */
 		for (segment = path; *path != '\0'; path++) {
 			if (*path == '.') {
 				*path++ = '\0';
 				break;
 			}
 		}
 
 		/* We have a segment, so look for a hook by that name */
 		hook = ng_findhook(node, segment);
 
 		TOPOLOGY_WLOCK();
 		/* Can't get there from here... */
 		if (hook == NULL || NG_HOOK_PEER(hook) == NULL ||
 		    NG_HOOK_NOT_VALID(hook) ||
 		    NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) {
 			TRAP_ERROR();
 			NG_NODE_UNREF(node);
 			TOPOLOGY_WUNLOCK();
 			return (ENOENT);
 		}
 
 		/*
 		 * Hop on over to the next node
 		 * XXX
 		 * Big race conditions here as hooks and nodes go away
 		 * *** Idea.. store an ng_ID_t in each hook and use that
 		 * instead of the direct hook in this crawl?
 		 */
 		oldnode = node;
 		if ((node = NG_PEER_NODE(hook)))
 			NG_NODE_REF(node);	/* XXX RACE */
 		NG_NODE_UNREF(oldnode);	/* XXX another race */
 		if (NG_NODE_NOT_VALID(node)) {
 			NG_NODE_UNREF(node);	/* XXX more races */
 			TOPOLOGY_WUNLOCK();
 			TRAP_ERROR();
 			return (ENXIO);
 		}
 
 		if (*path == '\0') {
 			if (lasthook != NULL) {
 				if (hook != NULL) {
 					*lasthook = NG_HOOK_PEER(hook);
 					NG_HOOK_REF(*lasthook);
 				} else
 					*lasthook = NULL;
 			}
 			TOPOLOGY_WUNLOCK();
 			*destp = node;
 			return (0);
 		}
 		TOPOLOGY_WUNLOCK();
 	}
 }
 
 /***************************************************************\
 * Input queue handling.
 * All activities are submitted to the node via the input queue
 * which implements a multiple-reader/single-writer gate.
 * Items which cannot be handled immediately are queued.
 *
 * read-write queue locking inline functions			*
 \***************************************************************/
 
 static __inline void	ng_queue_rw(node_p node, item_p  item, int rw);
 static __inline item_p	ng_dequeue(node_p node, int *rw);
 static __inline item_p	ng_acquire_read(node_p node, item_p  item);
 static __inline item_p	ng_acquire_write(node_p node, item_p  item);
 static __inline void	ng_leave_read(node_p node);
 static __inline void	ng_leave_write(node_p node);
 
 /*
  * Definition of the bits fields in the ng_queue flag word.
  * Defined here rather than in netgraph.h because no-one should fiddle
  * with them.
  *
  * The ordering here may be important! don't shuffle these.
  */
 /*-
  Safety Barrier--------+ (adjustable to suit taste) (not used yet)
                        |
                        V
 +-------+-------+-------+-------+-------+-------+-------+-------+
   | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
   | |A|c|t|i|v|e| |R|e|a|d|e|r| |C|o|u|n|t| | | | | | | | | |P|A|
   | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |O|W|
 +-------+-------+-------+-------+-------+-------+-------+-------+
   \___________________________ ____________________________/ | |
                             V                                | |
                   [active reader count]                      | |
                                                              | |
             Operation Pending -------------------------------+ |
                                                                |
           Active Writer ---------------------------------------+
 
 Node queue has such semantics:
 - All flags modifications are atomic.
 - Reader count can be incremented only if there is no writer or pending flags.
   As soon as this can't be done with single operation, it is implemented with
   spin loop and atomic_cmpset().
 - Writer flag can be set only if there is no any bits set.
   It is implemented with atomic_cmpset().
 - Pending flag can be set any time, but to avoid collision on queue processing
   all queue fields are protected by the mutex.
 - Queue processing thread reads queue holding the mutex, but releases it while
   processing. When queue is empty pending flag is removed.
 */
 
 #define WRITER_ACTIVE	0x00000001
 #define OP_PENDING	0x00000002
 #define READER_INCREMENT 0x00000004
 #define READER_MASK	0xfffffffc	/* Not valid if WRITER_ACTIVE is set */
 #define SAFETY_BARRIER	0x00100000	/* 128K items queued should be enough */
 
 /* Defines of more elaborate states on the queue */
 /* Mask of bits a new read cares about */
 #define NGQ_RMASK	(WRITER_ACTIVE|OP_PENDING)
 
 /* Mask of bits a new write cares about */
 #define NGQ_WMASK	(NGQ_RMASK|READER_MASK)
 
 /* Test to decide if there is something on the queue. */
 #define QUEUE_ACTIVE(QP) ((QP)->q_flags & OP_PENDING)
 
 /* How to decide what the next queued item is. */
 #define HEAD_IS_READER(QP)  NGI_QUEUED_READER(STAILQ_FIRST(&(QP)->queue))
 #define HEAD_IS_WRITER(QP)  NGI_QUEUED_WRITER(STAILQ_FIRST(&(QP)->queue)) /* notused */
 
 /* Read the status to decide if the next item on the queue can now run. */
 #define QUEUED_READER_CAN_PROCEED(QP)			\
 		(((QP)->q_flags & (NGQ_RMASK & ~OP_PENDING)) == 0)
 #define QUEUED_WRITER_CAN_PROCEED(QP)			\
 		(((QP)->q_flags & (NGQ_WMASK & ~OP_PENDING)) == 0)
 
 /* Is there a chance of getting ANY work off the queue? */
 #define NEXT_QUEUED_ITEM_CAN_PROCEED(QP)				\
 	((HEAD_IS_READER(QP)) ? QUEUED_READER_CAN_PROCEED(QP) :		\
 				QUEUED_WRITER_CAN_PROCEED(QP))
 
 #define NGQRW_R 0
 #define NGQRW_W 1
 
 #define NGQ2_WORKQ	0x00000001
 
 /*
  * Taking into account the current state of the queue and node, possibly take
  * the next entry off the queue and return it. Return NULL if there was
  * nothing we could return, either because there really was nothing there, or
  * because the node was in a state where it cannot yet process the next item
  * on the queue.
  */
 static __inline item_p
 ng_dequeue(node_p node, int *rw)
 {
 	item_p item;
 	struct ng_queue *ngq = &node->nd_input_queue;
 
 	/* This MUST be called with the mutex held. */
 	mtx_assert(&ngq->q_mtx, MA_OWNED);
 
 	/* If there is nothing queued, then just return. */
 	if (!QUEUE_ACTIVE(ngq)) {
 		CTR4(KTR_NET, "%20s: node [%x] (%p) queue empty; "
 		    "queue flags 0x%lx", __func__,
 		    node->nd_ID, node, ngq->q_flags);
 		return (NULL);
 	}
 
 	/*
 	 * From here, we can assume there is a head item.
 	 * We need to find out what it is and if it can be dequeued, given
 	 * the current state of the node.
 	 */
 	if (HEAD_IS_READER(ngq)) {
 		while (1) {
 			long t = ngq->q_flags;
 			if (t & WRITER_ACTIVE) {
 				/* There is writer, reader can't proceed. */
 				CTR4(KTR_NET, "%20s: node [%x] (%p) queued "
 				    "reader can't proceed; queue flags 0x%lx",
 				    __func__, node->nd_ID, node, t);
 				return (NULL);
 			}
 			if (atomic_cmpset_acq_int(&ngq->q_flags, t,
 			    t + READER_INCREMENT))
 				break;
 			cpu_spinwait();
 		}
 		/* We have got reader lock for the node. */
 		*rw = NGQRW_R;
 	} else if (atomic_cmpset_acq_int(&ngq->q_flags, OP_PENDING,
 	    OP_PENDING + WRITER_ACTIVE)) {
 		/* We have got writer lock for the node. */
 		*rw = NGQRW_W;
 	} else {
 		/* There is somebody other, writer can't proceed. */
 		CTR4(KTR_NET, "%20s: node [%x] (%p) queued writer can't "
 		    "proceed; queue flags 0x%lx", __func__, node->nd_ID, node,
 		    ngq->q_flags);
 		return (NULL);
 	}
 
 	/*
 	 * Now we dequeue the request (whatever it may be) and correct the
 	 * pending flags and the next and last pointers.
 	 */
 	item = STAILQ_FIRST(&ngq->queue);
 	STAILQ_REMOVE_HEAD(&ngq->queue, el_next);
 	if (STAILQ_EMPTY(&ngq->queue))
 		atomic_clear_int(&ngq->q_flags, OP_PENDING);
 	CTR6(KTR_NET, "%20s: node [%x] (%p) returning item %p as %s; queue "
 	    "flags 0x%lx", __func__, node->nd_ID, node, item, *rw ? "WRITER" :
 	    "READER", ngq->q_flags);
 	return (item);
 }
 
 /*
  * Queue a packet to be picked up later by someone else.
  * If the queue could be run now, add node to the queue handler's worklist.
  */
 static __inline void
 ng_queue_rw(node_p node, item_p  item, int rw)
 {
 	struct ng_queue *ngq = &node->nd_input_queue;
 	if (rw == NGQRW_W)
 		NGI_SET_WRITER(item);
 	else
 		NGI_SET_READER(item);
 	item->depth = 1;
 
 	NG_QUEUE_LOCK(ngq);
 	/* Set OP_PENDING flag and enqueue the item. */
 	atomic_set_int(&ngq->q_flags, OP_PENDING);
 	STAILQ_INSERT_TAIL(&ngq->queue, item, el_next);
 
 	CTR5(KTR_NET, "%20s: node [%x] (%p) queued item %p as %s", __func__,
 	    node->nd_ID, node, item, rw ? "WRITER" : "READER" );
 
 	/*
 	 * We can take the worklist lock with the node locked
 	 * BUT NOT THE REVERSE!
 	 */
 	if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq))
 		ng_worklist_add(node);
 	NG_QUEUE_UNLOCK(ngq);
 }
 
 /* Acquire reader lock on node. If node is busy, queue the packet. */
 static __inline item_p
 ng_acquire_read(node_p node, item_p item)
 {
 	KASSERT(node != &ng_deadnode,
 	    ("%s: working on deadnode", __func__));
 
 	/* Reader needs node without writer and pending items. */
 	for (;;) {
 		long t = node->nd_input_queue.q_flags;
 		if (t & NGQ_RMASK)
 			break; /* Node is not ready for reader. */
 		if (atomic_cmpset_acq_int(&node->nd_input_queue.q_flags, t,
 		    t + READER_INCREMENT)) {
 	    		/* Successfully grabbed node */
 			CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p",
 			    __func__, node->nd_ID, node, item);
 			return (item);
 		}
 		cpu_spinwait();
 	}
 
 	/* Queue the request for later. */
 	ng_queue_rw(node, item, NGQRW_R);
 
 	return (NULL);
 }
 
 /* Acquire writer lock on node. If node is busy, queue the packet. */
 static __inline item_p
 ng_acquire_write(node_p node, item_p item)
 {
 	KASSERT(node != &ng_deadnode,
 	    ("%s: working on deadnode", __func__));
 
 	/* Writer needs completely idle node. */
 	if (atomic_cmpset_acq_int(&node->nd_input_queue.q_flags, 0,
 	    WRITER_ACTIVE)) {
 	    	/* Successfully grabbed node */
 		CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p",
 		    __func__, node->nd_ID, node, item);
 		return (item);
 	}
 
 	/* Queue the request for later. */
 	ng_queue_rw(node, item, NGQRW_W);
 
 	return (NULL);
 }
 
 #if 0
 static __inline item_p
 ng_upgrade_write(node_p node, item_p item)
 {
 	struct ng_queue *ngq = &node->nd_input_queue;
 	KASSERT(node != &ng_deadnode,
 	    ("%s: working on deadnode", __func__));
 
 	NGI_SET_WRITER(item);
 
 	NG_QUEUE_LOCK(ngq);
 
 	/*
 	 * There will never be no readers as we are there ourselves.
 	 * Set the WRITER_ACTIVE flags ASAP to block out fast track readers.
 	 * The caller we are running from will call ng_leave_read()
 	 * soon, so we must account for that. We must leave again with the
 	 * READER lock. If we find other readers, then
 	 * queue the request for later. However "later" may be rignt now
 	 * if there are no readers. We don't really care if there are queued
 	 * items as we will bypass them anyhow.
 	 */
 	atomic_add_int(&ngq->q_flags, WRITER_ACTIVE - READER_INCREMENT);
 	if ((ngq->q_flags & (NGQ_WMASK & ~OP_PENDING)) == WRITER_ACTIVE) {
 		NG_QUEUE_UNLOCK(ngq);
 		
 		/* It's just us, act on the item. */
 		/* will NOT drop writer lock when done */
 		ng_apply_item(node, item, 0);
 
 		/*
 		 * Having acted on the item, atomically
 		 * downgrade back to READER and finish up.
 	 	 */
 		atomic_add_int(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE);
 
 		/* Our caller will call ng_leave_read() */
 		return;
 	}
 	/*
 	 * It's not just us active, so queue us AT THE HEAD.
 	 * "Why?" I hear you ask.
 	 * Put us at the head of the queue as we've already been
 	 * through it once. If there is nothing else waiting,
 	 * set the correct flags.
 	 */
 	if (STAILQ_EMPTY(&ngq->queue)) {
 		/* We've gone from, 0 to 1 item in the queue */
 		atomic_set_int(&ngq->q_flags, OP_PENDING);
 
 		CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__,
 		    node->nd_ID, node);
 	};
 	STAILQ_INSERT_HEAD(&ngq->queue, item, el_next);
 	CTR4(KTR_NET, "%20s: node [%x] (%p) requeued item %p as WRITER",
 	    __func__, node->nd_ID, node, item );
 
 	/* Reverse what we did above. That downgrades us back to reader */
 	atomic_add_int(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE);
 	if (QUEUE_ACTIVE(ngq) && NEXT_QUEUED_ITEM_CAN_PROCEED(ngq))
 		ng_worklist_add(node);
 	NG_QUEUE_UNLOCK(ngq);
 
 	return;
 }
 #endif
 
 /* Release reader lock. */
 static __inline void
 ng_leave_read(node_p node)
 {
 	atomic_subtract_rel_int(&node->nd_input_queue.q_flags, READER_INCREMENT);
 }
 
 /* Release writer lock. */
 static __inline void
 ng_leave_write(node_p node)
 {
 	atomic_clear_rel_int(&node->nd_input_queue.q_flags, WRITER_ACTIVE);
 }
 
 /* Purge node queue. Called on node shutdown. */
 static void
 ng_flush_input_queue(node_p node)
 {
 	struct ng_queue *ngq = &node->nd_input_queue;
 	item_p item;
 
 	NG_QUEUE_LOCK(ngq);
 	while ((item = STAILQ_FIRST(&ngq->queue)) != NULL) {
 		STAILQ_REMOVE_HEAD(&ngq->queue, el_next);
 		if (STAILQ_EMPTY(&ngq->queue))
 			atomic_clear_int(&ngq->q_flags, OP_PENDING);
 		NG_QUEUE_UNLOCK(ngq);
 
 		/* If the item is supplying a callback, call it with an error */
 		if (item->apply != NULL) {
 			if (item->depth == 1)
 				item->apply->error = ENOENT;
 			if (refcount_release(&item->apply->refs)) {
 				(*item->apply->apply)(item->apply->context,
 				    item->apply->error);
 			}
 		}
 		NG_FREE_ITEM(item);
 		NG_QUEUE_LOCK(ngq);
 	}
 	NG_QUEUE_UNLOCK(ngq);
 }
 
 /***********************************************************************
 * Externally visible method for sending or queueing messages or data.
 ***********************************************************************/
 
 /*
  * The module code should have filled out the item correctly by this stage:
  * Common:
  *    reference to destination node.
  *    Reference to destination rcv hook if relevant.
  *    apply pointer must be or NULL or reference valid struct ng_apply_info.
  * Data:
  *    pointer to mbuf
  * Control_Message:
  *    pointer to msg.
  *    ID of original sender node. (return address)
  * Function:
  *    Function pointer
  *    void * argument
  *    integer argument
  *
  * The nodes have several routines and macros to help with this task:
  */
 
 int
 ng_snd_item(item_p item, int flags)
 {
 	hook_p hook;
 	node_p node;
 	int queue, rw;
 	struct ng_queue *ngq;
 	int error = 0;
 
 	/* We are sending item, so it must be present! */
 	KASSERT(item != NULL, ("ng_snd_item: item is NULL"));
 
 #ifdef	NETGRAPH_DEBUG
 	_ngi_check(item, __FILE__, __LINE__);
 #endif
 
 	/* Item was sent once more, postpone apply() call. */
 	if (item->apply)
 		refcount_acquire(&item->apply->refs);
 
 	node = NGI_NODE(item);
 	/* Node is never optional. */
 	KASSERT(node != NULL, ("ng_snd_item: node is NULL"));
 
 	hook = NGI_HOOK(item);
 	/* Valid hook and mbuf are mandatory for data. */
 	if ((item->el_flags & NGQF_TYPE) == NGQF_DATA) {
 		KASSERT(hook != NULL, ("ng_snd_item: hook for data is NULL"));
 		if (NGI_M(item) == NULL)
 			ERROUT(EINVAL);
 		CHECK_DATA_MBUF(NGI_M(item));
 	}
 
 	/*
 	 * If the item or the node specifies single threading, force
 	 * writer semantics. Similarly, the node may say one hook always
 	 * produces writers. These are overrides.
 	 */
 	if (((item->el_flags & NGQF_RW) == NGQF_WRITER) ||
 	    (node->nd_flags & NGF_FORCE_WRITER) ||
 	    (hook && (hook->hk_flags & HK_FORCE_WRITER))) {
 		rw = NGQRW_W;
 	} else {
 		rw = NGQRW_R;
 	}
 
 	/*
 	 * If sender or receiver requests queued delivery, or call graph
 	 * loops back from outbound to inbound path, or stack usage
 	 * level is dangerous - enqueue message.
 	 */
 	if ((flags & NG_QUEUE) || (hook && (hook->hk_flags & HK_QUEUE))) {
 		queue = 1;
 	} else if (hook && (hook->hk_flags & HK_TO_INBOUND) &&
 	    curthread->td_ng_outbound) {
 		queue = 1;
 	} else {
 		queue = 0;
 
 		/*
 		 * Most of netgraph nodes have small stack consumption and
 		 * for them 25% of free stack space is more than enough.
 		 * Nodes/hooks with higher stack usage should be marked as
 		 * HI_STACK. For them 50% of stack will be guaranteed then.
 		 * XXX: Values 25% and 50% are completely empirical.
 		 */
 		size_t	st, su, sl;
 		GET_STACK_USAGE(st, su);
 		sl = st - su;
 		if ((sl * 4 < st) || ((sl * 2 < st) &&
 		    ((node->nd_flags & NGF_HI_STACK) || (hook &&
 		    (hook->hk_flags & HK_HI_STACK)))))
 			queue = 1;
 	}
 
 	if (queue) {
 		/* Put it on the queue for that node*/
 		ng_queue_rw(node, item, rw);
 		return ((flags & NG_PROGRESS) ? EINPROGRESS : 0);
 	}
 
 	/*
 	 * We already decided how we will be queueud or treated.
 	 * Try get the appropriate operating permission.
 	 */
  	if (rw == NGQRW_R)
 		item = ng_acquire_read(node, item);
 	else
 		item = ng_acquire_write(node, item);
 
 	/* Item was queued while trying to get permission. */
 	if (item == NULL)
 		return ((flags & NG_PROGRESS) ? EINPROGRESS : 0);
 
 	NGI_GET_NODE(item, node); /* zaps stored node */
 
 	item->depth++;
 	error = ng_apply_item(node, item, rw); /* drops r/w lock when done */
 
 	/* If something is waiting on queue and ready, schedule it. */
 	ngq = &node->nd_input_queue;
 	if (QUEUE_ACTIVE(ngq)) {
 		NG_QUEUE_LOCK(ngq);
 		if (QUEUE_ACTIVE(ngq) && NEXT_QUEUED_ITEM_CAN_PROCEED(ngq))
 			ng_worklist_add(node);
 		NG_QUEUE_UNLOCK(ngq);
 	}
 
 	/*
 	 * Node may go away as soon as we remove the reference.
 	 * Whatever we do, DO NOT access the node again!
 	 */
 	NG_NODE_UNREF(node);
 
 	return (error);
 
 done:
 	/* If was not sent, apply callback here. */
 	if (item->apply != NULL) {
 		if (item->depth == 0 && error != 0)
 			item->apply->error = error;
 		if (refcount_release(&item->apply->refs)) {
 			(*item->apply->apply)(item->apply->context,
 			    item->apply->error);
 		}
 	}
 
 	NG_FREE_ITEM(item);
 	return (error);
 }
 
 /*
  * We have an item that was possibly queued somewhere.
  * It should contain all the information needed
  * to run it on the appropriate node/hook.
  * If there is apply pointer and we own the last reference, call apply().
  */
 static int
 ng_apply_item(node_p node, item_p item, int rw)
 {
 	hook_p  hook;
 	ng_rcvdata_t *rcvdata;
 	ng_rcvmsg_t *rcvmsg;
 	struct ng_apply_info *apply;
 	int	error = 0, depth;
 
 	/* Node and item are never optional. */
 	KASSERT(node != NULL, ("ng_apply_item: node is NULL"));
 	KASSERT(item != NULL, ("ng_apply_item: item is NULL"));
 
 	NGI_GET_HOOK(item, hook); /* clears stored hook */
 #ifdef	NETGRAPH_DEBUG
 	_ngi_check(item, __FILE__, __LINE__);
 #endif
 
 	apply = item->apply;
 	depth = item->depth;
 
 	switch (item->el_flags & NGQF_TYPE) {
 	case NGQF_DATA:
 		/*
 		 * Check things are still ok as when we were queued.
 		 */
 		KASSERT(hook != NULL, ("ng_apply_item: hook for data is NULL"));
 		if (NG_HOOK_NOT_VALID(hook) ||
 		    NG_NODE_NOT_VALID(node)) {
 			error = EIO;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		/*
 		 * If no receive method, just silently drop it.
 		 * Give preference to the hook over-ride method.
 		 */
 		if ((!(rcvdata = hook->hk_rcvdata)) &&
 		    (!(rcvdata = NG_HOOK_NODE(hook)->nd_type->rcvdata))) {
 			error = 0;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		error = (*rcvdata)(hook, item);
 		break;
 	case NGQF_MESG:
 		if (hook && NG_HOOK_NOT_VALID(hook)) {
 			/*
 			 * The hook has been zapped then we can't use it.
 			 * Immediately drop its reference.
 			 * The message may not need it.
 			 */
 			NG_HOOK_UNREF(hook);
 			hook = NULL;
 		}
 		/*
 		 * Similarly, if the node is a zombie there is
 		 * nothing we can do with it, drop everything.
 		 */
 		if (NG_NODE_NOT_VALID(node)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		/*
 		 * Call the appropriate message handler for the object.
 		 * It is up to the message handler to free the message.
 		 * If it's a generic message, handle it generically,
 		 * otherwise call the type's message handler (if it exists).
 		 * XXX (race). Remember that a queued message may
 		 * reference a node or hook that has just been
 		 * invalidated. It will exist as the queue code
 		 * is holding a reference, but..
 		 */
 		if ((NGI_MSG(item)->header.typecookie == NGM_GENERIC_COOKIE) &&
 		    ((NGI_MSG(item)->header.flags & NGF_RESP) == 0)) {
 			error = ng_generic_msg(node, item, hook);
 			break;
 		}
 		if (((!hook) || (!(rcvmsg = hook->hk_rcvmsg))) &&
 		    (!(rcvmsg = node->nd_type->rcvmsg))) {
 			TRAP_ERROR();
 			error = 0;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		error = (*rcvmsg)(node, item, hook);
 		break;
 	case NGQF_FN:
 	case NGQF_FN2:
 		/*
 		 * In the case of the shutdown message we allow it to hit
 		 * even if the node is invalid.
 		 */
 		if (NG_NODE_NOT_VALID(node) &&
 		    NGI_FN(item) != &ng_rmnode) {
 			TRAP_ERROR();
 			error = EINVAL;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		/* Same is about some internal functions and invalid hook. */
 		if (hook && NG_HOOK_NOT_VALID(hook) &&
 		    NGI_FN2(item) != &ng_con_part2 &&
 		    NGI_FN2(item) != &ng_con_part3 &&
 		    NGI_FN(item) != &ng_rmhook_part2) {
 			TRAP_ERROR();
 			error = EINVAL;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		
 		if ((item->el_flags & NGQF_TYPE) == NGQF_FN) {
 			(*NGI_FN(item))(node, hook, NGI_ARG1(item),
 			    NGI_ARG2(item));
 			NG_FREE_ITEM(item);
 		} else	/* it is NGQF_FN2 */
 			error = (*NGI_FN2(item))(node, item, hook);
 		break;
 	}
 	/*
 	 * We held references on some of the resources
 	 * that we took from the item. Now that we have
 	 * finished doing everything, drop those references.
 	 */
 	if (hook)
 		NG_HOOK_UNREF(hook);
 
  	if (rw == NGQRW_R)
 		ng_leave_read(node);
 	else
 		ng_leave_write(node);
 
 	/* Apply callback. */
 	if (apply != NULL) {
 		if (depth == 1 && error != 0)
 			apply->error = error;
 		if (refcount_release(&apply->refs))
 			(*apply->apply)(apply->context, apply->error);
 	}
 
 	return (error);
 }
 
 /***********************************************************************
  * Implement the 'generic' control messages
  ***********************************************************************/
 static int
 ng_generic_msg(node_p here, item_p item, hook_p lasthook)
 {
 	int error = 0;
 	struct ng_mesg *msg;
 	struct ng_mesg *resp = NULL;
 
 	NGI_GET_MSG(item, msg);
 	if (msg->header.typecookie != NGM_GENERIC_COOKIE) {
 		TRAP_ERROR();
 		error = EINVAL;
 		goto out;
 	}
 	switch (msg->header.cmd) {
 	case NGM_SHUTDOWN:
 		ng_rmnode(here, NULL, NULL, 0);
 		break;
 	case NGM_MKPEER:
 	    {
 		struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data;
 
 		if (msg->header.arglen != sizeof(*mkp)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		mkp->type[sizeof(mkp->type) - 1] = '\0';
 		mkp->ourhook[sizeof(mkp->ourhook) - 1] = '\0';
 		mkp->peerhook[sizeof(mkp->peerhook) - 1] = '\0';
 		error = ng_mkpeer(here, mkp->ourhook, mkp->peerhook, mkp->type);
 		break;
 	    }
 	case NGM_CONNECT:
 	    {
 		struct ngm_connect *const con =
 			(struct ngm_connect *) msg->data;
 		node_p node2;
 
 		if (msg->header.arglen != sizeof(*con)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		con->path[sizeof(con->path) - 1] = '\0';
 		con->ourhook[sizeof(con->ourhook) - 1] = '\0';
 		con->peerhook[sizeof(con->peerhook) - 1] = '\0';
 		/* Don't forget we get a reference.. */
 		error = ng_path2noderef(here, con->path, &node2, NULL);
 		if (error)
 			break;
 		error = ng_con_nodes(item, here, con->ourhook,
 		    node2, con->peerhook);
 		NG_NODE_UNREF(node2);
 		break;
 	    }
 	case NGM_NAME:
 	    {
 		struct ngm_name *const nam = (struct ngm_name *) msg->data;
 
 		if (msg->header.arglen != sizeof(*nam)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		nam->name[sizeof(nam->name) - 1] = '\0';
 		error = ng_name_node(here, nam->name);
 		break;
 	    }
 	case NGM_RMHOOK:
 	    {
 		struct ngm_rmhook *const rmh = (struct ngm_rmhook *) msg->data;
 		hook_p hook;
 
 		if (msg->header.arglen != sizeof(*rmh)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		rmh->ourhook[sizeof(rmh->ourhook) - 1] = '\0';
 		if ((hook = ng_findhook(here, rmh->ourhook)) != NULL)
 			ng_destroy_hook(hook);
 		break;
 	    }
 	case NGM_NODEINFO:
 	    {
 		struct nodeinfo *ni;
 
 		NG_MKRESPONSE(resp, msg, sizeof(*ni), M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 
 		/* Fill in node info */
 		ni = (struct nodeinfo *) resp->data;
 		if (NG_NODE_HAS_NAME(here))
 			strcpy(ni->name, NG_NODE_NAME(here));
 		strcpy(ni->type, here->nd_type->name);
 		ni->id = ng_node2ID(here);
 		ni->hooks = here->nd_numhooks;
 		break;
 	    }
 	case NGM_LISTHOOKS:
 	    {
 		const int nhooks = here->nd_numhooks;
 		struct hooklist *hl;
 		struct nodeinfo *ni;
 		hook_p hook;
 
 		/* Get response struct */
 		NG_MKRESPONSE(resp, msg, sizeof(*hl) +
 		    (nhooks * sizeof(struct linkinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		hl = (struct hooklist *) resp->data;
 		ni = &hl->nodeinfo;
 
 		/* Fill in node info */
 		if (NG_NODE_HAS_NAME(here))
 			strcpy(ni->name, NG_NODE_NAME(here));
 		strcpy(ni->type, here->nd_type->name);
 		ni->id = ng_node2ID(here);
 
 		/* Cycle through the linked list of hooks */
 		ni->hooks = 0;
 		LIST_FOREACH(hook, &here->nd_hooks, hk_hooks) {
 			struct linkinfo *const link = &hl->link[ni->hooks];
 
 			if (ni->hooks >= nhooks) {
 				log(LOG_ERR, "%s: number of %s changed\n",
 				    __func__, "hooks");
 				break;
 			}
 			if (NG_HOOK_NOT_VALID(hook))
 				continue;
 			strcpy(link->ourhook, NG_HOOK_NAME(hook));
 			strcpy(link->peerhook, NG_PEER_HOOK_NAME(hook));
 			if (NG_PEER_NODE_NAME(hook)[0] != '\0')
 				strcpy(link->nodeinfo.name,
 				    NG_PEER_NODE_NAME(hook));
 			strcpy(link->nodeinfo.type,
 			   NG_PEER_NODE(hook)->nd_type->name);
 			link->nodeinfo.id = ng_node2ID(NG_PEER_NODE(hook));
 			link->nodeinfo.hooks = NG_PEER_NODE(hook)->nd_numhooks;
 			ni->hooks++;
 		}
 		break;
 	    }
 
 	case NGM_LISTNODES:
 	    {
 		struct namelist *nl;
 		node_p node;
 		int i;
 
 		IDHASH_RLOCK();
 		/* Get response struct. */
 		NG_MKRESPONSE(resp, msg, sizeof(*nl) +
 		    (V_ng_nodes * sizeof(struct nodeinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			IDHASH_RUNLOCK();
 			error = ENOMEM;
 			break;
 		}
 		nl = (struct namelist *) resp->data;
 
 		/* Cycle through the lists of nodes. */
 		nl->numnames = 0;
 		for (i = 0; i <= V_ng_ID_hmask; i++) {
 			LIST_FOREACH(node, &V_ng_ID_hash[i], nd_idnodes) {
 				struct nodeinfo *const np =
 				    &nl->nodeinfo[nl->numnames];
 
 				if (NG_NODE_NOT_VALID(node))
 					continue;
 				if (NG_NODE_HAS_NAME(node))
 					strcpy(np->name, NG_NODE_NAME(node));
 				strcpy(np->type, node->nd_type->name);
 				np->id = ng_node2ID(node);
 				np->hooks = node->nd_numhooks;
 				KASSERT(nl->numnames < V_ng_nodes,
 				    ("%s: no space", __func__));
 				nl->numnames++;
 			}
 		}
 		IDHASH_RUNLOCK();
 		break;
 	    }
 	case NGM_LISTNAMES:
 	    {
 		struct namelist *nl;
 		node_p node;
 		int i;
 
 		NAMEHASH_RLOCK();
 		/* Get response struct. */
 		NG_MKRESPONSE(resp, msg, sizeof(*nl) +
 		    (V_ng_named_nodes * sizeof(struct nodeinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			NAMEHASH_RUNLOCK();
 			error = ENOMEM;
 			break;
 		}
 		nl = (struct namelist *) resp->data;
 
 		/* Cycle through the lists of nodes. */
 		nl->numnames = 0;
 		for (i = 0; i <= V_ng_name_hmask; i++) {
 			LIST_FOREACH(node, &V_ng_name_hash[i], nd_nodes) {
 				struct nodeinfo *const np =
 				    &nl->nodeinfo[nl->numnames];
 
 				if (NG_NODE_NOT_VALID(node))
 					continue;
 				strcpy(np->name, NG_NODE_NAME(node));
 				strcpy(np->type, node->nd_type->name);
 				np->id = ng_node2ID(node);
 				np->hooks = node->nd_numhooks;
 				KASSERT(nl->numnames < V_ng_named_nodes,
 				    ("%s: no space", __func__));
 				nl->numnames++;
 			}
 		}
 		NAMEHASH_RUNLOCK();
 		break;
 	    }
 
 	case NGM_LISTTYPES:
 	    {
 		struct typelist *tl;
 		struct ng_type *type;
 		int num = 0;
 
 		TYPELIST_RLOCK();
 		/* Count number of types */
 		LIST_FOREACH(type, &ng_typelist, types)
 			num++;
 
 		/* Get response struct */
 		NG_MKRESPONSE(resp, msg, sizeof(*tl) +
 		    (num * sizeof(struct typeinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			TYPELIST_RUNLOCK();
 			error = ENOMEM;
 			break;
 		}
 		tl = (struct typelist *) resp->data;
 
 		/* Cycle through the linked list of types */
 		tl->numtypes = 0;
 		LIST_FOREACH(type, &ng_typelist, types) {
 			struct typeinfo *const tp = &tl->typeinfo[tl->numtypes];
 
 			strcpy(tp->type_name, type->name);
 			tp->numnodes = type->refs - 1; /* don't count list */
 			KASSERT(tl->numtypes < num, ("%s: no space", __func__));
 			tl->numtypes++;
 		}
 		TYPELIST_RUNLOCK();
 		break;
 	    }
 
 	case NGM_BINARY2ASCII:
 	    {
 		int bufSize = 1024;
 		const struct ng_parse_type *argstype;
 		const struct ng_cmdlist *c;
 		struct ng_mesg *binary, *ascii;
 
 		/* Data area must contain a valid netgraph message */
 		binary = (struct ng_mesg *)msg->data;
 		if (msg->header.arglen < sizeof(struct ng_mesg) ||
 		    (msg->header.arglen - sizeof(struct ng_mesg) <
 		    binary->header.arglen)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 retry_b2a:
 		/* Get a response message with lots of room */
 		NG_MKRESPONSE(resp, msg, sizeof(*ascii) + bufSize, M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		ascii = (struct ng_mesg *)resp->data;
 
 		/* Copy binary message header to response message payload */
 		bcopy(binary, ascii, sizeof(*binary));
 
 		/* Find command by matching typecookie and command number */
 		for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL;
 		    c++) {
 			if (binary->header.typecookie == c->cookie &&
 			    binary->header.cmd == c->cmd)
 				break;
 		}
 		if (c == NULL || c->name == NULL) {
 			for (c = ng_generic_cmds; c->name != NULL; c++) {
 				if (binary->header.typecookie == c->cookie &&
 				    binary->header.cmd == c->cmd)
 					break;
 			}
 			if (c->name == NULL) {
 				NG_FREE_MSG(resp);
 				error = ENOSYS;
 				break;
 			}
 		}
 
 		/* Convert command name to ASCII */
 		snprintf(ascii->header.cmdstr, sizeof(ascii->header.cmdstr),
 		    "%s", c->name);
 
 		/* Convert command arguments to ASCII */
 		argstype = (binary->header.flags & NGF_RESP) ?
 		    c->respType : c->mesgType;
 		if (argstype == NULL) {
 			*ascii->data = '\0';
 		} else {
 			error = ng_unparse(argstype, (u_char *)binary->data,
 			    ascii->data, bufSize);
 			if (error == ERANGE) {
 				NG_FREE_MSG(resp);
 				bufSize *= 2;
 				goto retry_b2a;
 			} else if (error) {
 				NG_FREE_MSG(resp);
 				break;
 			}
 		}
 
 		/* Return the result as struct ng_mesg plus ASCII string */
 		bufSize = strlen(ascii->data) + 1;
 		ascii->header.arglen = bufSize;
 		resp->header.arglen = sizeof(*ascii) + bufSize;
 		break;
 	    }
 
 	case NGM_ASCII2BINARY:
 	    {
 		int bufSize = 20 * 1024;	/* XXX hard coded constant */
 		const struct ng_cmdlist *c;
 		const struct ng_parse_type *argstype;
 		struct ng_mesg *ascii, *binary;
 		int off = 0;
 
 		/* Data area must contain at least a struct ng_mesg + '\0' */
 		ascii = (struct ng_mesg *)msg->data;
 		if ((msg->header.arglen < sizeof(*ascii) + 1) ||
 		    (ascii->header.arglen < 1) ||
 		    (msg->header.arglen < sizeof(*ascii) +
 		    ascii->header.arglen)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		ascii->data[ascii->header.arglen - 1] = '\0';
 
 		/* Get a response message with lots of room */
 		NG_MKRESPONSE(resp, msg, sizeof(*binary) + bufSize, M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		binary = (struct ng_mesg *)resp->data;
 
 		/* Copy ASCII message header to response message payload */
 		bcopy(ascii, binary, sizeof(*ascii));
 
 		/* Find command by matching ASCII command string */
 		for (c = here->nd_type->cmdlist;
 		    c != NULL && c->name != NULL; c++) {
 			if (strcmp(ascii->header.cmdstr, c->name) == 0)
 				break;
 		}
 		if (c == NULL || c->name == NULL) {
 			for (c = ng_generic_cmds; c->name != NULL; c++) {
 				if (strcmp(ascii->header.cmdstr, c->name) == 0)
 					break;
 			}
 			if (c->name == NULL) {
 				NG_FREE_MSG(resp);
 				error = ENOSYS;
 				break;
 			}
 		}
 
 		/* Convert command name to binary */
 		binary->header.cmd = c->cmd;
 		binary->header.typecookie = c->cookie;
 
 		/* Convert command arguments to binary */
 		argstype = (binary->header.flags & NGF_RESP) ?
 		    c->respType : c->mesgType;
 		if (argstype == NULL) {
 			bufSize = 0;
 		} else {
 			if ((error = ng_parse(argstype, ascii->data, &off,
 			    (u_char *)binary->data, &bufSize)) != 0) {
 				NG_FREE_MSG(resp);
 				break;
 			}
 		}
 
 		/* Return the result */
 		binary->header.arglen = bufSize;
 		resp->header.arglen = sizeof(*binary) + bufSize;
 		break;
 	    }
 
 	case NGM_TEXT_CONFIG:
 	case NGM_TEXT_STATUS:
 		/*
 		 * This one is tricky as it passes the command down to the
 		 * actual node, even though it is a generic type command.
 		 * This means we must assume that the item/msg is already freed
 		 * when control passes back to us.
 		 */
 		if (here->nd_type->rcvmsg != NULL) {
 			NGI_MSG(item) = msg; /* put it back as we found it */
 			return((*here->nd_type->rcvmsg)(here, item, lasthook));
 		}
 		/* Fall through if rcvmsg not supported */
 	default:
 		TRAP_ERROR();
 		error = EINVAL;
 	}
 	/*
 	 * Sometimes a generic message may be statically allocated
 	 * to avoid problems with allocating when in tight memory situations.
 	 * Don't free it if it is so.
 	 * I break them apart here, because erros may cause a free if the item
 	 * in which case we'd be doing it twice.
 	 * they are kept together above, to simplify freeing.
 	 */
 out:
 	NG_RESPOND_MSG(error, here, item, resp);
 	NG_FREE_MSG(msg);
 	return (error);
 }
 
 /************************************************************************
 			Queue element get/free routines
 ************************************************************************/
 
 uma_zone_t			ng_qzone;
 uma_zone_t			ng_qdzone;
 static int			numthreads = 0; /* number of queue threads */
 static int			maxalloc = 4096;/* limit the damage of a leak */
 static int			maxdata = 4096;	/* limit the damage of a DoS */
 
 SYSCTL_INT(_net_graph, OID_AUTO, threads, CTLFLAG_RDTUN, &numthreads,
     0, "Number of queue processing threads");
 SYSCTL_INT(_net_graph, OID_AUTO, maxalloc, CTLFLAG_RDTUN, &maxalloc,
     0, "Maximum number of non-data queue items to allocate");
 SYSCTL_INT(_net_graph, OID_AUTO, maxdata, CTLFLAG_RDTUN, &maxdata,
     0, "Maximum number of data queue items to allocate");
 
 #ifdef	NETGRAPH_DEBUG
 static TAILQ_HEAD(, ng_item) ng_itemlist = TAILQ_HEAD_INITIALIZER(ng_itemlist);
 static int allocated;	/* number of items malloc'd */
 #endif
 
 /*
  * Get a queue entry.
  * This is usually called when a packet first enters netgraph.
  * By definition, this is usually from an interrupt, or from a user.
  * Users are not so important, but try be quick for the times that it's
  * an interrupt.
  */
 static __inline item_p
 ng_alloc_item(int type, int flags)
 {
 	item_p item;
 
 	KASSERT(((type & ~NGQF_TYPE) == 0),
 	    ("%s: incorrect item type: %d", __func__, type));
 
 	item = uma_zalloc((type == NGQF_DATA) ? ng_qdzone : ng_qzone,
 	    ((flags & NG_WAITOK) ? M_WAITOK : M_NOWAIT) | M_ZERO);
 
 	if (item) {
 		item->el_flags = type;
 #ifdef	NETGRAPH_DEBUG
 		mtx_lock(&ngq_mtx);
 		TAILQ_INSERT_TAIL(&ng_itemlist, item, all);
 		allocated++;
 		mtx_unlock(&ngq_mtx);
 #endif
 	}
 
 	return (item);
 }
 
 /*
  * Release a queue entry
  */
 void
 ng_free_item(item_p item)
 {
 	/*
 	 * The item may hold resources on its own. We need to free
 	 * these before we can free the item. What they are depends upon
 	 * what kind of item it is. it is important that nodes zero
 	 * out pointers to resources that they remove from the item
 	 * or we release them again here.
 	 */
 	switch (item->el_flags & NGQF_TYPE) {
 	case NGQF_DATA:
 		/* If we have an mbuf still attached.. */
 		NG_FREE_M(_NGI_M(item));
 		break;
 	case NGQF_MESG:
 		_NGI_RETADDR(item) = 0;
 		NG_FREE_MSG(_NGI_MSG(item));
 		break;
 	case NGQF_FN:
 	case NGQF_FN2:
 		/* nothing to free really, */
 		_NGI_FN(item) = NULL;
 		_NGI_ARG1(item) = NULL;
 		_NGI_ARG2(item) = 0;
 		break;
 	}
 	/* If we still have a node or hook referenced... */
 	_NGI_CLR_NODE(item);
 	_NGI_CLR_HOOK(item);
 
 #ifdef	NETGRAPH_DEBUG
 	mtx_lock(&ngq_mtx);
 	TAILQ_REMOVE(&ng_itemlist, item, all);
 	allocated--;
 	mtx_unlock(&ngq_mtx);
 #endif
 	uma_zfree(((item->el_flags & NGQF_TYPE) == NGQF_DATA) ?
 	    ng_qdzone : ng_qzone, item);
 }
 
 /*
  * Change type of the queue entry.
  * Possibly reallocates it from another UMA zone.
  */
 static __inline item_p
 ng_realloc_item(item_p pitem, int type, int flags)
 {
 	item_p item;
 	int from, to;
 
 	KASSERT((pitem != NULL), ("%s: can't reallocate NULL", __func__));
 	KASSERT(((type & ~NGQF_TYPE) == 0),
 	    ("%s: incorrect item type: %d", __func__, type));
 
 	from = ((pitem->el_flags & NGQF_TYPE) == NGQF_DATA);
 	to = (type == NGQF_DATA);
 	if (from != to) {
 		/* If reallocation is required do it and copy item. */
 		if ((item = ng_alloc_item(type, flags)) == NULL) {
 			ng_free_item(pitem);
 			return (NULL);
 		}
 		*item = *pitem;
 		ng_free_item(pitem);
 	} else
 		item = pitem;
 	item->el_flags = (item->el_flags & ~NGQF_TYPE) | type;
 
 	return (item);
 }
 
 /************************************************************************
 			Module routines
 ************************************************************************/
 
 /*
  * Handle the loading/unloading of a netgraph node type module
  */
 int
 ng_mod_event(module_t mod, int event, void *data)
 {
 	struct ng_type *const type = data;
 	int error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 
 		/* Register new netgraph node type */
 		if ((error = ng_newtype(type)) != 0)
 			break;
 
 		/* Call type specific code */
 		if (type->mod_event != NULL)
 			if ((error = (*type->mod_event)(mod, event, data))) {
 				TYPELIST_WLOCK();
 				type->refs--;	/* undo it */
 				LIST_REMOVE(type, types);
 				TYPELIST_WUNLOCK();
 			}
 		break;
 
 	case MOD_UNLOAD:
 		if (type->refs > 1) {		/* make sure no nodes exist! */
 			error = EBUSY;
 		} else {
 			if (type->refs == 0) /* failed load, nothing to undo */
 				break;
 			if (type->mod_event != NULL) {	/* check with type */
 				error = (*type->mod_event)(mod, event, data);
 				if (error != 0)	/* type refuses.. */
 					break;
 			}
 			TYPELIST_WLOCK();
 			LIST_REMOVE(type, types);
 			TYPELIST_WUNLOCK();
 		}
 		break;
 
 	default:
 		if (type->mod_event != NULL)
 			error = (*type->mod_event)(mod, event, data);
 		else
 			error = EOPNOTSUPP;		/* XXX ? */
 		break;
 	}
 	return (error);
 }
 
 static void
 vnet_netgraph_init(const void *unused __unused)
 {
 
 	/* We start with small hashes, but they can grow. */
 	V_ng_ID_hash = hashinit(16, M_NETGRAPH_NODE, &V_ng_ID_hmask);
 	V_ng_name_hash = hashinit(16, M_NETGRAPH_NODE, &V_ng_name_hmask);
 }
 VNET_SYSINIT(vnet_netgraph_init, SI_SUB_NETGRAPH, SI_ORDER_FIRST,
     vnet_netgraph_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_netgraph_uninit(const void *unused __unused)
 {
 	node_p node = NULL, last_killed = NULL;
 	int i;
 
 	do {
 		/* Find a node to kill */
 		IDHASH_RLOCK();
 		for (i = 0; i <= V_ng_ID_hmask; i++) {
 			LIST_FOREACH(node, &V_ng_ID_hash[i], nd_idnodes) {
 				if (node != &ng_deadnode) {
 					NG_NODE_REF(node);
 					break;
 				}
 			}
 			if (node != NULL)
 				break;
 		}
 		IDHASH_RUNLOCK();
 
 		/* Attempt to kill it only if it is a regular node */
 		if (node != NULL) {
 			if (node == last_killed) {
 				if (node->nd_flags & NGF_REALLY_DIE)
 					panic("ng node %s won't die",
 					    node->nd_name);
 				/* The node persisted itself.  Try again. */
 				node->nd_flags |= NGF_REALLY_DIE;
 			}
 			ng_rmnode(node, NULL, NULL, 0);
 			NG_NODE_UNREF(node);
 			last_killed = node;
 		}
 	} while (node != NULL);
 
 	hashdestroy(V_ng_name_hash, M_NETGRAPH_NODE, V_ng_name_hmask);
 	hashdestroy(V_ng_ID_hash, M_NETGRAPH_NODE, V_ng_ID_hmask);
 }
 VNET_SYSUNINIT(vnet_netgraph_uninit, SI_SUB_NETGRAPH, SI_ORDER_FIRST,
     vnet_netgraph_uninit, NULL);
 #endif /* VIMAGE */
 
 /*
  * Handle loading and unloading for this code.
  * The only thing we need to link into is the NETISR strucure.
  */
 static int
 ngb_mod_event(module_t mod, int event, void *data)
 {
 	struct proc *p;
 	struct thread *td;
 	int i, error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 		/* Initialize everything. */
 		NG_WORKLIST_LOCK_INIT();
 		rw_init(&ng_typelist_lock, "netgraph types");
 		rw_init(&ng_idhash_lock, "netgraph idhash");
 		rw_init(&ng_namehash_lock, "netgraph namehash");
 		rw_init(&ng_topo_lock, "netgraph topology mutex");
 #ifdef	NETGRAPH_DEBUG
 		mtx_init(&ng_nodelist_mtx, "netgraph nodelist mutex", NULL,
 		    MTX_DEF);
 		mtx_init(&ngq_mtx, "netgraph item list mutex", NULL,
 		    MTX_DEF);
 #endif
 		ng_qzone = uma_zcreate("NetGraph items", sizeof(struct ng_item),
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
 		uma_zone_set_max(ng_qzone, maxalloc);
 		ng_qdzone = uma_zcreate("NetGraph data items",
 		    sizeof(struct ng_item), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_CACHE, 0);
 		uma_zone_set_max(ng_qdzone, maxdata);
 		/* Autoconfigure number of threads. */
 		if (numthreads <= 0)
 			numthreads = mp_ncpus;
 		/* Create threads. */
     		p = NULL; /* start with no process */
 		for (i = 0; i < numthreads; i++) {
 			if (kproc_kthread_add(ngthread, NULL, &p, &td,
 			    RFHIGHPID, 0, "ng_queue", "ng_queue%d", i)) {
 				numthreads = i;
 				break;
 			}
 		}
 		break;
 	case MOD_UNLOAD:
 		/* You can't unload it because an interface may be using it. */
 		error = EBUSY;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t netgraph_mod = {
 	"netgraph",
 	ngb_mod_event,
 	(NULL)
 };
 DECLARE_MODULE(netgraph, netgraph_mod, SI_SUB_NETGRAPH, SI_ORDER_FIRST);
 SYSCTL_NODE(_net, OID_AUTO, graph, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "netgraph Family");
 SYSCTL_INT(_net_graph, OID_AUTO, abi_version, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_ABI_VERSION,"");
 SYSCTL_INT(_net_graph, OID_AUTO, msg_version, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_VERSION, "");
 
 #ifdef	NETGRAPH_DEBUG
 void
 dumphook (hook_p hook, char *file, int line)
 {
 	printf("hook: name %s, %d refs, Last touched:\n",
 		_NG_HOOK_NAME(hook), hook->hk_refs);
 	printf("	Last active @ %s, line %d\n",
 		hook->lastfile, hook->lastline);
 	if (line) {
 		printf(" problem discovered at file %s, line %d\n", file, line);
 #ifdef KDB
 		kdb_backtrace();
 #endif
 	}
 }
 
 void
 dumpnode(node_p node, char *file, int line)
 {
 	printf("node: ID [%x]: type '%s', %d hooks, flags 0x%x, %d refs, %s:\n",
 		_NG_NODE_ID(node), node->nd_type->name,
 		node->nd_numhooks, node->nd_flags,
 		node->nd_refs, node->nd_name);
 	printf("	Last active @ %s, line %d\n",
 		node->lastfile, node->lastline);
 	if (line) {
 		printf(" problem discovered at file %s, line %d\n", file, line);
 #ifdef KDB
 		kdb_backtrace();
 #endif
 	}
 }
 
 void
 dumpitem(item_p item, char *file, int line)
 {
 	printf(" ACTIVE item, last used at %s, line %d",
 		item->lastfile, item->lastline);
 	switch(item->el_flags & NGQF_TYPE) {
 	case NGQF_DATA:
 		printf(" - [data]\n");
 		break;
 	case NGQF_MESG:
 		printf(" - retaddr[%d]:\n", _NGI_RETADDR(item));
 		break;
 	case NGQF_FN:
 		printf(" - fn@%p (%p, %p, %p, %d (%x))\n",
 			_NGI_FN(item),
 			_NGI_NODE(item),
 			_NGI_HOOK(item),
 			item->body.fn.fn_arg1,
 			item->body.fn.fn_arg2,
 			item->body.fn.fn_arg2);
 		break;
 	case NGQF_FN2:
 		printf(" - fn2@%p (%p, %p, %p, %d (%x))\n",
 			_NGI_FN2(item),
 			_NGI_NODE(item),
 			_NGI_HOOK(item),
 			item->body.fn.fn_arg1,
 			item->body.fn.fn_arg2,
 			item->body.fn.fn_arg2);
 		break;
 	}
 	if (line) {
 		printf(" problem discovered at file %s, line %d\n", file, line);
 		if (_NGI_NODE(item)) {
 			printf("node %p ([%x])\n",
 				_NGI_NODE(item), ng_node2ID(_NGI_NODE(item)));
 		}
 	}
 }
 
 static void
 ng_dumpitems(void)
 {
 	item_p item;
 	int i = 1;
 	TAILQ_FOREACH(item, &ng_itemlist, all) {
 		printf("[%d] ", i++);
 		dumpitem(item, NULL, 0);
 	}
 }
 
 static void
 ng_dumpnodes(void)
 {
 	node_p node;
 	int i = 1;
 	mtx_lock(&ng_nodelist_mtx);
 	SLIST_FOREACH(node, &ng_allnodes, nd_all) {
 		printf("[%d] ", i++);
 		dumpnode(node, NULL, 0);
 	}
 	mtx_unlock(&ng_nodelist_mtx);
 }
 
 static void
 ng_dumphooks(void)
 {
 	hook_p hook;
 	int i = 1;
 	mtx_lock(&ng_nodelist_mtx);
 	SLIST_FOREACH(hook, &ng_allhooks, hk_all) {
 		printf("[%d] ", i++);
 		dumphook(hook, NULL, 0);
 	}
 	mtx_unlock(&ng_nodelist_mtx);
 }
 
 static int
 sysctl_debug_ng_dump_items(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = allocated;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val == 42) {
 		ng_dumpitems();
 		ng_dumpnodes();
 		ng_dumphooks();
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, ng_dump_items,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
     sysctl_debug_ng_dump_items, "I",
     "Number of allocated items");
 #endif	/* NETGRAPH_DEBUG */
 
 /***********************************************************************
 * Worklist routines
 **********************************************************************/
 /*
  * Pick a node off the list of nodes with work,
  * try get an item to process off it. Remove the node from the list.
  */
 static void
 ngthread(void *arg)
 {
 	for (;;) {
 		struct epoch_tracker et;
 		node_p  node;
 
 		/* Get node from the worklist. */
 		NG_WORKLIST_LOCK();
 		while ((node = STAILQ_FIRST(&ng_worklist)) == NULL)
 			NG_WORKLIST_SLEEP();
 		STAILQ_REMOVE_HEAD(&ng_worklist, nd_input_queue.q_work);
 		NG_WORKLIST_UNLOCK();
 		CURVNET_SET(node->nd_vnet);
 		CTR3(KTR_NET, "%20s: node [%x] (%p) taken off worklist",
 		    __func__, node->nd_ID, node);
 		/*
 		 * We have the node. We also take over the reference
 		 * that the list had on it.
 		 * Now process as much as you can, until it won't
 		 * let you have another item off the queue.
 		 * All this time, keep the reference
 		 * that lets us be sure that the node still exists.
 		 * Let the reference go at the last minute.
 		 */
 		NET_EPOCH_ENTER(et);
 		for (;;) {
 			item_p item;
 			int rw;
 
 			NG_QUEUE_LOCK(&node->nd_input_queue);
 			item = ng_dequeue(node, &rw);
 			if (item == NULL) {
 				node->nd_input_queue.q_flags2 &= ~NGQ2_WORKQ;
 				NG_QUEUE_UNLOCK(&node->nd_input_queue);
 				break; /* go look for another node */
 			} else {
 				NG_QUEUE_UNLOCK(&node->nd_input_queue);
 				NGI_GET_NODE(item, node); /* zaps stored node */
 
 				if ((item->el_flags & NGQF_TYPE) == NGQF_MESG) {
 					/*
 					 * NGQF_MESG items should never be processed in
 					 * NET_EPOCH context. So, temporary exit from EPOCH.
 					 */
 					NET_EPOCH_EXIT(et);
 					ng_apply_item(node, item, rw);
 					NET_EPOCH_ENTER(et);
 				} else {
 					ng_apply_item(node, item, rw);
 				}
 
 				NG_NODE_UNREF(node);
 			}
 		}
 		NET_EPOCH_EXIT(et);
 		NG_NODE_UNREF(node);
 		CURVNET_RESTORE();
 	}
 }
 
 /*
  * XXX
  * It's possible that a debugging NG_NODE_REF may need
  * to be outside the mutex zone
  */
 static void
 ng_worklist_add(node_p node)
 {
 
 	mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED);
 
 	if ((node->nd_input_queue.q_flags2 & NGQ2_WORKQ) == 0) {
 		/*
 		 * If we are not already on the work queue,
 		 * then put us on.
 		 */
 		node->nd_input_queue.q_flags2 |= NGQ2_WORKQ;
 		NG_NODE_REF(node); /* XXX safe in mutex? */
 		NG_WORKLIST_LOCK();
 		STAILQ_INSERT_TAIL(&ng_worklist, node, nd_input_queue.q_work);
 		NG_WORKLIST_UNLOCK();
 		CTR3(KTR_NET, "%20s: node [%x] (%p) put on worklist", __func__,
 		    node->nd_ID, node);
 		NG_WORKLIST_WAKEUP();
 	} else {
 		CTR3(KTR_NET, "%20s: node [%x] (%p) already on worklist",
 		    __func__, node->nd_ID, node);
 	}
 }
 
 /***********************************************************************
 * Externally useable functions to set up a queue item ready for sending
 ***********************************************************************/
 
 #ifdef	NETGRAPH_DEBUG
 #define	ITEM_DEBUG_CHECKS						\
 	do {								\
 		if (NGI_NODE(item) ) {					\
 			printf("item already has node");		\
 			kdb_enter(KDB_WHY_NETGRAPH, "has node");	\
 			NGI_CLR_NODE(item);				\
 		}							\
 		if (NGI_HOOK(item) ) {					\
 			printf("item already has hook");		\
 			kdb_enter(KDB_WHY_NETGRAPH, "has hook");	\
 			NGI_CLR_HOOK(item);				\
 		}							\
 	} while (0)
 #else
 #define ITEM_DEBUG_CHECKS
 #endif
 
 /*
  * Put mbuf into the item.
  * Hook and node references will be removed when the item is dequeued.
  * (or equivalent)
  * (XXX) Unsafe because no reference held by peer on remote node.
  * remote node might go away in this timescale.
  * We know the hooks can't go away because that would require getting
  * a writer item on both nodes and we must have at least a  reader
  * here to be able to do this.
  * Note that the hook loaded is the REMOTE hook.
  *
  * This is possibly in the critical path for new data.
  */
 item_p
 ng_package_data(struct mbuf *m, int flags)
 {
 	item_p item;
 
 	if ((item = ng_alloc_item(NGQF_DATA, flags)) == NULL) {
 		NG_FREE_M(m);
 		return (NULL);
 	}
 	ITEM_DEBUG_CHECKS;
 	item->el_flags |= NGQF_READER;
 	NGI_M(item) = m;
 	return (item);
 }
 
 /*
  * Allocate a queue item and put items into it..
  * Evaluate the address as this will be needed to queue it and
  * to work out what some of the fields should be.
  * Hook and node references will be removed when the item is dequeued.
  * (or equivalent)
  */
 item_p
 ng_package_msg(struct ng_mesg *msg, int flags)
 {
 	item_p item;
 
 	if ((item = ng_alloc_item(NGQF_MESG, flags)) == NULL) {
 		NG_FREE_MSG(msg);
 		return (NULL);
 	}
 	ITEM_DEBUG_CHECKS;
 	/* Messages items count as writers unless explicitly exempted. */
 	if (msg->header.cmd & NGM_READONLY)
 		item->el_flags |= NGQF_READER;
 	else
 		item->el_flags |= NGQF_WRITER;
 	/*
 	 * Set the current lasthook into the queue item
 	 */
 	NGI_MSG(item) = msg;
 	NGI_RETADDR(item) = 0;
 	return (item);
 }
 
 #define SET_RETADDR(item, here, retaddr)				\
 	do {	/* Data or fn items don't have retaddrs */		\
 		if ((item->el_flags & NGQF_TYPE) == NGQF_MESG) {	\
 			if (retaddr) {					\
 				NGI_RETADDR(item) = retaddr;		\
 			} else {					\
 				/*					\
 				 * The old return address should be ok.	\
 				 * If there isn't one, use the address	\
 				 * here.				\
 				 */					\
 				if (NGI_RETADDR(item) == 0) {		\
 					NGI_RETADDR(item)		\
 						= ng_node2ID(here);	\
 				}					\
 			}						\
 		}							\
 	} while (0)
 
 int
 ng_address_hook(node_p here, item_p item, hook_p hook, ng_ID_t retaddr)
 {
 	hook_p peer;
 	node_p peernode;
 	ITEM_DEBUG_CHECKS;
 	/*
 	 * Quick sanity check..
 	 * Since a hook holds a reference on its node, once we know
 	 * that the peer is still connected (even if invalid,) we know
 	 * that the peer node is present, though maybe invalid.
 	 */
 	TOPOLOGY_RLOCK();
 	if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) ||
 	    NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) ||
 	    NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) {
 		NG_FREE_ITEM(item);
 		TRAP_ERROR();
 		TOPOLOGY_RUNLOCK();
 		return (ENETDOWN);
 	}
 
 	/*
 	 * Transfer our interest to the other (peer) end.
 	 */
 	NG_HOOK_REF(peer);
 	NG_NODE_REF(peernode);
 	NGI_SET_HOOK(item, peer);
 	NGI_SET_NODE(item, peernode);
 	SET_RETADDR(item, here, retaddr);
 
 	TOPOLOGY_RUNLOCK();
 
 	return (0);
 }
 
 int
 ng_address_path(node_p here, item_p item, const char *address, ng_ID_t retaddr)
 {
 	node_p	dest = NULL;
 	hook_p	hook = NULL;
 	int	error;
 
 	ITEM_DEBUG_CHECKS;
 	/*
 	 * Note that ng_path2noderef increments the reference count
 	 * on the node for us if it finds one. So we don't have to.
 	 */
 	error = ng_path2noderef(here, address, &dest, &hook);
 	if (error) {
 		NG_FREE_ITEM(item);
 		return (error);
 	}
 	NGI_SET_NODE(item, dest);
 	if (hook)
 		NGI_SET_HOOK(item, hook);
 
 	SET_RETADDR(item, here, retaddr);
 	return (0);
 }
 
 int
 ng_address_ID(node_p here, item_p item, ng_ID_t ID, ng_ID_t retaddr)
 {
 	node_p dest;
 
 	ITEM_DEBUG_CHECKS;
 	/*
 	 * Find the target node.
 	 */
 	dest = ng_ID2noderef(ID); /* GETS REFERENCE! */
 	if (dest == NULL) {
 		NG_FREE_ITEM(item);
 		TRAP_ERROR();
 		return(EINVAL);
 	}
 	/* Fill out the contents */
 	NGI_SET_NODE(item, dest);
 	NGI_CLR_HOOK(item);
 	SET_RETADDR(item, here, retaddr);
 	return (0);
 }
 
 /*
  * special case to send a message to self (e.g. destroy node)
  * Possibly indicate an arrival hook too.
  * Useful for removing that hook :-)
  */
 item_p
 ng_package_msg_self(node_p here, hook_p hook, struct ng_mesg *msg)
 {
 	item_p item;
 
 	/*
 	 * Find the target node.
 	 * If there is a HOOK argument, then use that in preference
 	 * to the address.
 	 */
 	if ((item = ng_alloc_item(NGQF_MESG, NG_NOFLAGS)) == NULL) {
 		NG_FREE_MSG(msg);
 		return (NULL);
 	}
 
 	/* Fill out the contents */
 	item->el_flags |= NGQF_WRITER;
 	NG_NODE_REF(here);
 	NGI_SET_NODE(item, here);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_MSG(item) = msg;
 	NGI_RETADDR(item) = ng_node2ID(here);
 	return (item);
 }
 
 /*
  * Send ng_item_fn function call to the specified node.
  */
 
 int
 ng_send_fn(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2)
 {
 
 	return ng_send_fn1(node, hook, fn, arg1, arg2, NG_NOFLAGS);
 }
 
 int
 ng_send_fn1(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2,
 	int flags)
 {
 	item_p item;
 
 	if ((item = ng_alloc_item(NGQF_FN, flags)) == NULL) {
 		return (ENOMEM);
 	}
 	item->el_flags |= NGQF_WRITER;
 	NG_NODE_REF(node); /* and one for the item */
 	NGI_SET_NODE(item, node);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_FN(item) = fn;
 	NGI_ARG1(item) = arg1;
 	NGI_ARG2(item) = arg2;
 	return(ng_snd_item(item, flags));
 }
 
 /*
  * Send ng_item_fn2 function call to the specified node.
  *
  * If an optional pitem parameter is supplied, its apply
  * callback will be copied to the new item. If also NG_REUSE_ITEM
  * flag is set, no new item will be allocated, but pitem will
  * be used.
  */
 int
 ng_send_fn2(node_p node, hook_p hook, item_p pitem, ng_item_fn2 *fn, void *arg1,
 	int arg2, int flags)
 {
 	item_p item;
 
 	KASSERT((pitem != NULL || (flags & NG_REUSE_ITEM) == 0),
 	    ("%s: NG_REUSE_ITEM but no pitem", __func__));
 
 	/*
 	 * Allocate a new item if no supplied or
 	 * if we can't use supplied one.
 	 */
 	if (pitem == NULL || (flags & NG_REUSE_ITEM) == 0) {
 		if ((item = ng_alloc_item(NGQF_FN2, flags)) == NULL)
 			return (ENOMEM);
 		if (pitem != NULL)
 			item->apply = pitem->apply;
 	} else {
 		if ((item = ng_realloc_item(pitem, NGQF_FN2, flags)) == NULL)
 			return (ENOMEM);
 	}
 
 	item->el_flags = (item->el_flags & ~NGQF_RW) | NGQF_WRITER;
 	NG_NODE_REF(node); /* and one for the item */
 	NGI_SET_NODE(item, node);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_FN2(item) = fn;
 	NGI_ARG1(item) = arg1;
 	NGI_ARG2(item) = arg2;
 	return(ng_snd_item(item, flags));
 }
 
 /*
  * Official timeout routines for Netgraph nodes.
  */
 static void
 ng_callout_trampoline(void *arg)
 {
 	struct epoch_tracker et;
 	item_p item = arg;
 
 	NET_EPOCH_ENTER(et);
 	CURVNET_SET(NGI_NODE(item)->nd_vnet);
 	ng_snd_item(item, 0);
 	CURVNET_RESTORE();
 	NET_EPOCH_EXIT(et);
 }
 
 int
 ng_callout(struct callout *c, node_p node, hook_p hook, int ticks,
     ng_item_fn *fn, void * arg1, int arg2)
 {
 	item_p item, oitem;
 
 	if ((item = ng_alloc_item(NGQF_FN, NG_NOFLAGS)) == NULL)
 		return (ENOMEM);
 
 	item->el_flags |= NGQF_WRITER;
 	NG_NODE_REF(node);		/* and one for the item */
 	NGI_SET_NODE(item, node);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_FN(item) = fn;
 	NGI_ARG1(item) = arg1;
 	NGI_ARG2(item) = arg2;
 	oitem = c->c_arg;
 	if (callout_reset(c, ticks, &ng_callout_trampoline, item) == 1 &&
 	    oitem != NULL)
 		NG_FREE_ITEM(oitem);
 	return (0);
 }
 
 /*
  * Free references and item if callout_stop/callout_drain returned 1,
  * meaning that callout was successfully stopped and now references
  * belong to us.
  */
 static void
 ng_uncallout_internal(struct callout *c, node_p node)
 {
 	item_p item;
 
 	item = c->c_arg;
 	if ((c->c_func == &ng_callout_trampoline) &&
 	    (item != NULL) && (NGI_NODE(item) == node)) {
 		/*
 		 * We successfully removed it from the queue before it ran
 		 * So now we need to unreference everything that was
 		 * given extra references. (NG_FREE_ITEM does this).
 		 */
 		NG_FREE_ITEM(item);
 	}
 	c->c_arg = NULL;
 }
 
 
 /* A special modified version of callout_stop() */
 int
 ng_uncallout(struct callout *c, node_p node)
 {
 	int rval;
 
 	rval = callout_stop(c);
 	if (rval > 0)
 		/*
 		 * XXXGL: in case if callout is already running and next
 		 * invocation is scheduled at the same time, callout_stop()
 		 * returns 0. See d153eeee97d. In this case netgraph(4) would
 		 * leak resources. However, no nodes are known to induce such
 		 * behavior.
 		 */
 		ng_uncallout_internal(c, node);
 
 	return (rval);
 }
 
 /* A special modified version of callout_drain() */
 int
 ng_uncallout_drain(struct callout *c, node_p node)
 {
 	int rval;
 
 	rval = callout_drain(c);
 	if (rval > 0)
 		ng_uncallout_internal(c, node);
 
 	return (rval);
 }
 
 /*
  * Set the address, if none given, give the node here.
  */
 void
 ng_replace_retaddr(node_p here, item_p item, ng_ID_t retaddr)
 {
 	if (retaddr) {
 		NGI_RETADDR(item) = retaddr;
 	} else {
 		/*
 		 * The old return address should be ok.
 		 * If there isn't one, use the address here.
 		 */
 		NGI_RETADDR(item) = ng_node2ID(here);
 	}
 }
diff --git a/sys/powerpc/include/proc.h b/sys/powerpc/include/proc.h
index 2c6a00536b8a..0f8d36bfe856 100644
--- a/sys/powerpc/include/proc.h
+++ b/sys/powerpc/include/proc.h
@@ -1,74 +1,62 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$NetBSD: proc.h,v 1.2 1997/04/16 22:57:48 thorpej Exp $
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PROC_H_
 #define	_MACHINE_PROC_H_
 
 /*
  * Machine-dependent part of the proc structure
  */
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_msr;	/* (k) */
 };
 
 struct mdproc {
 	/*
 	 * Avoid empty structs because they are undefined behavior.
 	 */
 	long	md_spare;
 };
 
 #ifdef __powerpc64__
 #define	KINFO_PROC_SIZE 1088
 #define	KINFO_PROC32_SIZE 816
 #else
 #define	KINFO_PROC_SIZE 816
 #endif
 
-#ifdef _KERNEL
-
-#include <machine/pcb.h>
-
-/* Get the current kernel thread stack usage. */
-#define	GET_STACK_USAGE(total, used) do {				\
-	struct thread *td = curthread;					\
-	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
-	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
-} while (0)
-#endif
-
 #endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/powerpc/include/stack.h b/sys/powerpc/include/stack.h
index c433a9fe09eb..953afd6f0aa4 100644
--- a/sys/powerpc/include/stack.h
+++ b/sys/powerpc/include/stack.h
@@ -1,36 +1,55 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1992 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_STACK_H_
 #define	_MACHINE_STACK_H_
 
 extern int trapexit[];
 extern int asttrapexit[];
 extern int end[];
 
+#ifdef _SYS_PROC_H_
+
+#include <machine/pcb.h>
+
+/* Get the current kernel thread stack usage. */
+#define	GET_STACK_USAGE(total, used) do {				\
+	struct thread *td = curthread;					\
+	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
+	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
+} while (0)
+
+static __inline bool
+kstack_contains(struct thread *td, vm_offset_t va, size_t len)
+{
+	return (va >= td->td_kstack && va + len >= va &&
+	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
+}
+#endif	/* _SYS_PROC_H_ */
+
 #endif /* !_MACHINE_STACK_H_ */
diff --git a/sys/riscv/include/proc.h b/sys/riscv/include/proc.h
index 648c529f4322..ce0a62675308 100644
--- a/sys/riscv/include/proc.h
+++ b/sys/riscv/include/proc.h
@@ -1,59 +1,48 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      from: @(#)proc.h        7.1 (Berkeley) 5/15/91
  *	from: FreeBSD: src/sys/i386/include/proc.h,v 1.11 2001/06/29
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PROC_H_
 #define	_MACHINE_PROC_H_
 
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_sstatus_ie;	/* (k) */
 };
 
 struct mdproc {
 	int dummy;
 };
 
 #define	KINFO_PROC_SIZE	1088
 
-#ifdef _KERNEL
-#include <machine/pcb.h>
-
-/* Get the current kernel thread stack usage. */
-#define	GET_STACK_USAGE(total, used) do {				\
-	struct thread *td = curthread;					\
-	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
-	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
-} while (0)
-
-#endif  /* _KERNEL */
 #endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/riscv/include/stack.h b/sys/riscv/include/stack.h
index 82f851096d7b..566081c3ebd0 100644
--- a/sys/riscv/include/stack.h
+++ b/sys/riscv/include/stack.h
@@ -1,51 +1,70 @@
 /*-
  * Copyright (c) 2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_STACK_H_
 #define	_MACHINE_STACK_H_
 
 #define	INKERNEL(va)	((va) >= VM_MIN_KERNEL_ADDRESS && \
 			 (va) <= VM_MAX_KERNEL_ADDRESS)
 
 struct unwind_state {
 	uintptr_t fp;
 	uintptr_t sp;
 	uintptr_t pc;
 };
 
 bool unwind_frame(struct thread *, struct unwind_state *);
 
+#ifdef _SYS_PROC_H_
+
+#include <machine/pcb.h>
+
+/* Get the current kernel thread stack usage. */
+#define	GET_STACK_USAGE(total, used) do {				\
+	struct thread *td = curthread;					\
+	(total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb);	\
+	(used) = td->td_kstack + (total) - (vm_offset_t)&td;		\
+} while (0)
+
+static __inline bool
+kstack_contains(struct thread *td, vm_offset_t va, size_t len)
+{
+	return (va >= td->td_kstack && va + len >= va &&
+	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
+}
+#endif	/* _SYS_PROC_H_ */
+
 #endif /* !_MACHINE_STACK_H_ */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 2da5d8edee6d..2ad4505405c8 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1,1364 +1,1357 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)proc.h	8.15 (Berkeley) 5/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <sys/callout.h>		/* For struct callout. */
 #include <sys/event.h>			/* For struct klist. */
 #ifdef _KERNEL
 #include <sys/_eventhandler.h>
 #endif
 #include <sys/condvar.h>
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/lock_profile.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
 #include <sys/resource.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #ifndef _KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #else
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #endif
 #include <sys/ucontext.h>
 #include <sys/ucred.h>
 #include <sys/types.h>
 #include <sys/_domainset.h>
 
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 #ifdef _KERNEL
 #include <machine/cpu.h>
 #endif
 
 /*
  * One structure allocated per session.
  *
  * List of locks
  * (m)		locked by s_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct session {
 	u_int		s_count;	/* Ref cnt; pgrps in session - atomic. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
 	struct cdev_priv *s_ttydp;	/* (m) Device of controlling tty.  */
 	struct tty	*s_ttyp;	/* (e) Controlling tty. */
 	pid_t		s_sid;		/* (c) Session ID. */
 					/* (m) Setlogin() name: */
 	char		s_login[roundup(MAXLOGNAME, sizeof(long))];
 	struct mtx	s_mtx;		/* Mutex to protect members. */
 };
 
 /*
  * One structure allocated per process group.
  *
  * List of locks
  * (m)		locked by pg_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
 	pid_t		pg_id;		/* (c) Process group id. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 	int		pg_flags;	/* (m) PGRP_ flags */
 };
 
 #define	PGRP_ORPHANED	0x00000001	/* Group is orphaned */
 
 /*
  * pargs, used to hold a copy of the command line, if it had a sane length.
  */
 struct pargs {
 	u_int	ar_ref;		/* Reference count. */
 	u_int	ar_length;	/* Length. */
 	u_char	ar_args[1];	/* Arguments. */
 };
 
 /*-
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(CPU)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  *
  * Below is a key of locks used to protect each member of struct proc.  The
  * lock is indicated by a reference to a specific character in parens in the
  * associated comment.
  *      * - not yet protected
  *      a - only touched by curproc or parent during fork/wait
  *      b - created at fork, never changes
  *		(exception aiods switch vmspaces, but they are also
  *		marked 'P_SYSTEM' so hopefully it will be left alone)
  *      c - locked by proc mtx
  *      d - locked by allproc_lock lock
  *      e - locked by proctree_lock lock
  *      f - session mtx
  *      g - process group mtx
  *      h - callout_lock mtx
  *      i - by curproc or the master session mtx
  *      j - locked by proc slock
  *      k - only accessed by curthread
  *	k*- only accessed by curthread and from an interrupt
  *	kx- only accessed by curthread and by debugger
  *      l - the attaching proc or attaching proc parent
  *      n - not locked, lazy
  *      o - ktrace lock
  *      q - td_contested lock
  *      r - p_peers lock
  *      s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9)
  *      t - thread lock
  *	u - process stat lock
  *	w - process timer lock
  *      x - created at fork, only changes during single threading in exec
  *      y - created at first aio, doesn't change until exit or exec at which
  *          point we are single-threaded and only curthread changes it
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
 struct cpuset;
 struct filecaps;
 struct filemon;
 struct kaioinfo;
 struct kaudit_record;
 struct kcov_info;
 struct kdtrace_proc;
 struct kdtrace_thread;
 struct kmsan_td;
 struct kq_timer_cb_data;
 struct mqueue_notifier;
 struct p_sched;
 struct proc;
 struct procdesc;
 struct racct;
 struct sbuf;
 struct sleepqueue;
 struct socket;
 struct td_sched;
 struct thread;
 struct trapframe;
 struct turnstile;
 struct vm_map;
 struct vm_map_entry;
 struct epoch_tracker;
 
 struct syscall_args {
 	u_int code;
 	u_int original_code;
 	struct sysent *callp;
 	register_t args[8];
 };
 
 /*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
  * (with control info in the "previous" times), and are converted when
  * userland asks for rusage info.  Backwards compatibility prevents putting
  * this directly in the user-visible rusage struct.
  *
  * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux.
  * Locking for td_rux: (t) for all fields.
  */
 struct rusage_ext {
 	uint64_t	rux_runtime;    /* (cu) Real time. */
 	uint64_t	rux_uticks;     /* (cu) Statclock hits in user mode. */
 	uint64_t	rux_sticks;     /* (cu) Statclock hits in sys mode. */
 	uint64_t	rux_iticks;     /* (cu) Statclock hits in intr mode. */
 	uint64_t	rux_uu;         /* (c) Previous user time in usec. */
 	uint64_t	rux_su;         /* (c) Previous sys time in usec. */
 	uint64_t	rux_tu;         /* (c) Previous total time in usec. */
 };
 
 /*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
  */
 struct thread {
 	struct mtx	*volatile td_lock; /* replaces sched lock */
 	struct proc	*td_proc;	/* (*) Associated process. */
 	TAILQ_ENTRY(thread) td_plist;	/* (*) All threads in this proc. */
 	TAILQ_ENTRY(thread) td_runq;	/* (t) Run queue. */
 	union	{
 		TAILQ_ENTRY(thread) td_slpq;	/* (t) Sleep queue. */
 		struct thread *td_zombie; /* Zombie list linkage */
 	};
 	TAILQ_ENTRY(thread) td_lockq;	/* (t) Lock queue. */
 	LIST_ENTRY(thread) td_hash;	/* (d) Hash chain. */
 	struct cpuset	*td_cpuset;	/* (t) CPU affinity mask. */
 	struct domainset_ref td_domain;	/* (a) NUMA policy */
 	struct seltd	*td_sel;	/* Select queue/channel. */
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct rl_q_entry *td_rlqe;	/* (k) Associated range lock entry. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
 	lwpid_t		td_tid;		/* (b) Thread ID. */
 	sigqueue_t	td_sigqueue;	/* (c) Sigs arrived, not delivered. */
 #define	td_siglist	td_sigqueue.sq_signals
 	u_char		td_lend_user_pri; /* (t) Lend user pri. */
 	u_char		td_allocdomain;	/* (b) NUMA domain backing this struct thread. */
 	u_char		td_base_ithread_pri; /* (t) Base ithread pri */
 	struct kmsan_td	*td_kmsan;	/* (k) KMSAN state */
 
 /* Cleared during fork1() */
 #define	td_startzero td_flags
 	int		td_flags;	/* (t) TDF_* flags. */
 	int		td_ast;		/* (t) TDA_* indicators */
 	int		td_inhibitors;	/* (t) Why can not run. */
 	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
 	int		td_pflags2;	/* (k) Private thread (TDP2_*) flags. */
 	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
 	int		td_sqqueue;	/* (t) Sleepqueue queue blocked on. */
 	const void	*td_wchan;	/* (t) Sleep address. */
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
 	short		td_locks;	/* (k) Debug: count of non-spin locks */
 	short		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
 	short		td_sx_slocks;	/* (k) Count of sx shared locks. */
 	short		td_lk_slocks;	/* (k) Count of lockmgr shared locks. */
 	short		td_stopsched;	/* (k) Scheduler stopped. */
 	struct turnstile *td_blocked;	/* (t) Lock thread is blocked on. */
 	const char	*td_lockname;	/* (t) Name of lock blocked on. */
 	LIST_HEAD(, turnstile) td_contested;	/* (q) Contested locks. */
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
 	struct ucred	*td_realucred;	/* (k) Reference to credentials. */
 	struct ucred	*td_ucred;	/* (k) Used credentials, temporarily switchable. */
 	struct plimit	*td_limit;	/* (k) Resource limits. */
 	int		td_slptick;	/* (t) Time at sleep. */
 	int		td_blktick;	/* (t) Time spent blocked. */
 	int		td_swvoltick;	/* (t) Time at last SW_VOL switch. */
 	int		td_swinvoltick;	/* (t) Time at last SW_INVOL switch. */
 	u_int		td_cow;		/* (*) Number of copy-on-write faults */
 	struct rusage	td_ru;		/* (t) rusage information. */
 	struct rusage_ext td_rux;	/* (t) Internal rusage information. */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
 	u_int		td_sticks;	/* (t) Statclock hits in system mode. */
 	u_int		td_iticks;	/* (t) Statclock hits in intr mode. */
 	u_int		td_uticks;	/* (t) Statclock hits in user mode. */
 	int		td_intrval;	/* (t) Return value for sleepq. */
 	sigset_t	td_oldsigmask;	/* (k) Saved mask from pre sigpause. */
 	volatile u_int	td_generation;	/* (k) For detection of preemption */
 	stack_t		td_sigstk;	/* (k) Stack ptr and on-stack flag. */
 	int		td_xsig;	/* (c) Signal for ptrace */
 	u_long		td_profil_addr;	/* (k) Temporary addr until AST. */
 	u_int		td_profil_ticks; /* (k) Temporary ticks until AST. */
 	char		td_name[MAXCOMLEN + 1];	/* (*) Thread name. */
 	struct file	*td_fpop;	/* (k) file referencing cdev under op */
 	int		td_dbgflags;	/* (c) Userland debugger flags */
 	siginfo_t	td_si;		/* (c) For debugger or core file */
 	int		td_ng_outbound;	/* (k) Thread entered ng from above. */
 	struct osd	td_osd;		/* (k) Object specific data. */
 	struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
 	pid_t		td_dbg_forked;	/* (c) Child pid for debugger. */
 	struct vnode	*td_vp_reserved;/* (k) Preallocated vnode. */
 	u_int		td_no_sleeping;	/* (k) Sleeping disabled count. */
 	void		*td_su;		/* (k) FFS SU private */
 	sbintime_t	td_sleeptimo;	/* (t) Sleep timeout. */
 	int		td_rtcgen;	/* (s) rtc_generation of abs. sleep */
 	int		td_errno;	/* (k) Error from last syscall. */
 	size_t		td_vslock_sz;	/* (k) amount of vslock-ed space */
 	struct kcov_info *td_kcov_info;	/* (*) Kernel code coverage data */
 	u_int		td_ucredref;	/* (k) references on td_realucred */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */
 #define	td_startcopy td_endzero
 	sigset_t	td_sigmask;	/* (c) Current signal mask. */
 	u_char		td_rqindex;	/* (t) Run queue index. */
 	u_char		td_base_pri;	/* (t) Thread base kernel priority. */
 	u_char		td_priority;	/* (t) Thread active priority. */
 	u_char		td_pri_class;	/* (t) Scheduling class. */
 	u_char		td_user_pri;	/* (t) User pri from estcpu and nice. */
 	u_char		td_base_user_pri; /* (t) Base user pri */
 	uintptr_t	td_rb_list;	/* (k) Robust list head. */
 	uintptr_t	td_rbp_list;	/* (k) Robust priv list head. */
 	uintptr_t	td_rb_inact;	/* (k) Current in-action mutex loc. */
 	struct syscall_args td_sa;	/* (kx) Syscall parameters. Copied on
 					   fork for child tracing. */
 	void		*td_sigblock_ptr; /* (k) uptr for fast sigblock. */
 	uint32_t	td_sigblock_val;  /* (k) fast sigblock value read at
 					     td_sigblock_ptr on kern entry */
 #define	td_endcopy td_pcb
 
 /*
  * Fields that must be manually set in fork1() or create_thread()
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
 	enum td_states {
 		TDS_INACTIVE = 0x0,
 		TDS_INHIBITED,
 		TDS_CAN_RUN,
 		TDS_RUNQ,
 		TDS_RUNNING
 	} td_state;			/* (t) thread state */
 	/* Note: td_state must be accessed using TD_{GET,SET}_STATE(). */
 	union {
 		syscallarg_t	tdu_retval[2];
 		off_t		tdu_off;
 	} td_uretoff;			/* (k) Syscall aux returns. */
 #define td_retval	td_uretoff.tdu_retval
 	u_int		td_cowgen;	/* (k) Generation of COW pointers. */
 	/* LP64 hole */
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	int		td_kstack_pages; /* (a) Size of the kstack. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 	struct lpohead	td_lprof[2];	/* (a) lock profiling objects. */
 	struct kdtrace_thread	*td_dtrace; /* (*) DTrace-specific data. */
 	struct vnet	*td_vnet;	/* (k) Effective vnet. */
 	const char	*td_vnet_lpush;	/* (k) Debugging vnet push / pop. */
 	struct trapframe *td_intr_frame;/* (k) Frame of the current irq */
 	struct proc	*td_rfppwait_p;	/* (k) The vforked child */
 	struct vm_page	**td_ma;	/* (k) uio pages held */
 	int		td_ma_cnt;	/* (k) size of *td_ma */
 	/* LP64 hole */
 	void		*td_emuldata;	/* Emulator state data */
 	int		td_lastcpu;	/* (t) Last cpu we were on. */
 	int		td_oncpu;	/* (t) Which cpu we are on. */
 	void		*td_lkpi_task;	/* LinuxKPI task struct pointer */
 	int		td_pmcpend;
 	void		*td_remotereq;	/* (c) dbg remote request. */
 	off_t		td_ktr_io_lim;	/* (k) limit for ktrace file size */
 #ifdef EPOCH_TRACE
 	SLIST_HEAD(, epoch_tracker) td_epochs;
 #endif
 };
 
 struct thread0_storage {
 	struct thread t0st_thread;
 	uint64_t t0st_sched[10];
 };
 
 struct mtx *thread_lock_block(struct thread *);
 void thread_lock_block_wait(struct thread *);
 void thread_lock_set(struct thread *, struct mtx *);
 void thread_lock_unblock(struct thread *, struct mtx *);
 #define	THREAD_LOCK_ASSERT(td, type)					\
 	mtx_assert((td)->td_lock, (type))
 
 #define	THREAD_LOCK_BLOCKED_ASSERT(td, type)				\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	if (__m != &blocked_lock)					\
 		mtx_assert(__m, (type));				\
 } while (0)
 
 #ifdef INVARIANTS
 #define	THREAD_LOCKPTR_ASSERT(td, lock)					\
 do {									\
 	struct mtx *__m;						\
 	__m = (td)->td_lock;						\
 	KASSERT(__m == (lock),						\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)				\
 do {									\
 	struct mtx *__m;						\
 	__m = (td)->td_lock;						\
 	KASSERT(__m == (lock) || __m == &blocked_lock,			\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 
 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
 #define	TD_LOCKS_DEC(td) do {						\
 	KASSERT(SCHEDULER_STOPPED_TD(td) || (td)->td_locks > 0,		\
 	    ("thread %p owns no locks", (td)));				\
 	(td)->td_locks--;						\
 } while (0)
 #else
 #define	THREAD_LOCKPTR_ASSERT(td, lock)
 #define	THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)
 
 #define	TD_LOCKS_INC(td)
 #define	TD_LOCKS_DEC(td)
 #endif
 
 /*
  * Flags kept in td_flags:
  * To change these you MUST have the scheduler lock.
  */
 #define	TDF_BORROWING	0x00000001 /* Thread is borrowing pri from another. */
 #define	TDF_INPANIC	0x00000002 /* Caused a panic, let it drive crashdump. */
 #define	TDF_INMEM	0x00000004 /* Thread's stack is in memory. */
 #define	TDF_SINTR	0x00000008 /* Sleep is interruptible. */
 #define	TDF_TIMEOUT	0x00000010 /* Timing out during sleep. */
 #define	TDF_IDLETD	0x00000020 /* This is a per-CPU idle thread. */
 #define	TDF_CANSWAP	0x00000040 /* Thread can be swapped. */
 #define	TDF_SIGWAIT	0x00000080 /* Ignore ignored signals */
 #define	TDF_KTH_SUSP	0x00000100 /* kthread is suspended */
 #define	TDF_ALLPROCSUSP	0x00000200 /* suspended by SINGLE_ALLPROC */
 #define	TDF_BOUNDARY	0x00000400 /* Thread suspended at user boundary */
 #define	TDF_UNUSED1	0x00000800 /* Available */
 #define	TDF_UNUSED2	0x00001000 /* Available */
 #define	TDF_SBDRY	0x00002000 /* Stop only on usermode boundary. */
 #define	TDF_UPIBLOCKED	0x00004000 /* Thread blocked on user PI mutex. */
 #define	TDF_UNUSED3	0x00008000 /* Available */
 #define	TDF_UNUSED4	0x00010000 /* Available */
 #define	TDF_UNUSED5	0x00020000 /* Available */
 #define	TDF_NOLOAD	0x00040000 /* Ignore during load avg calculations. */
 #define	TDF_SERESTART	0x00080000 /* ERESTART on stop attempts. */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_SEINTR	0x00200000 /* EINTR on stop attempts. */
 #define	TDF_SWAPINREQ	0x00400000 /* Swapin request due to wakeup. */
 #define	TDF_UNUSED6	0x00800000 /* Available */
 #define	TDF_SCHED0	0x01000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED1	0x02000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED2	0x04000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED3	0x08000000 /* Reserved for scheduler private use */
 #define	TDF_UNUSED7	0x10000000 /* Available */
 #define	TDF_UNUSED8	0x20000000 /* Available */
 #define	TDF_UNUSED9	0x40000000 /* Available */
 #define	TDF_UNUSED10	0x80000000 /* Available */
 
 enum {
 	TDA_AST = 0,		/* Special: call all non-flagged AST handlers */
 	TDA_OWEUPC,
 	TDA_HWPMC,
 	TDA_VFORK,
 	TDA_ALRM,
 	TDA_PROF,
 	TDA_MAC,
 	TDA_SCHED,
 	TDA_UFS,
 	TDA_GEOM,
 	TDA_KQUEUE,
 	TDA_RACCT,
 	TDA_MOD1,		/* For third party use, before signals are */
 	TAD_MOD2,		/* processed .. */
 	TDA_SIG,
 	TDA_KTRACE,
 	TDA_SUSPEND,
 	TDA_SIGSUSPEND,
 	TDA_MOD3,		/* .. and after */
 	TAD_MOD4,
 	TDA_MAX,
 };
 #define	TDAI(tda)		(1U << (tda))
 #define	td_ast_pending(td, tda)	((td->td_ast & TDAI(tda)) != 0)
 
 /* Userland debug flags */
 #define	TDB_SUSPEND	0x00000001 /* Thread is suspended by debugger */
 #define	TDB_XSIG	0x00000002 /* Thread is exchanging signal under trace */
 #define	TDB_USERWR	0x00000004 /* Debugger modified memory or registers */
 #define	TDB_SCE		0x00000008 /* Thread performs syscall enter */
 #define	TDB_SCX		0x00000010 /* Thread performs syscall exit */
 #define	TDB_EXEC	0x00000020 /* TDB_SCX from exec(2) family */
 #define	TDB_FORK	0x00000040 /* TDB_SCX from fork(2) that created new
 				      process */
 #define	TDB_STOPATFORK	0x00000080 /* Stop at the return from fork (child
 				      only) */
 #define	TDB_CHILD	0x00000100 /* New child indicator for ptrace() */
 #define	TDB_BORN	0x00000200 /* New LWP indicator for ptrace() */
 #define	TDB_EXIT	0x00000400 /* Exiting LWP indicator for ptrace() */
 #define	TDB_VFORK	0x00000800 /* vfork indicator for ptrace() */
 #define	TDB_FSTP	0x00001000 /* The thread is PT_ATTACH leader */
 #define	TDB_STEP	0x00002000 /* (x86) PSL_T set for PT_STEP */
 #define	TDB_SSWITCH	0x00004000 /* Suspended in ptracestop */
 #define	TDB_BOUNDARY	0x00008000 /* ptracestop() at boundary */
 #define	TDB_COREDUMPREQ	0x00010000 /* Coredump request */
 #define	TDB_SCREMOTEREQ	0x00020000 /* Remote syscall request */
 
 /*
  * "Private" flags kept in td_pflags:
  * These are only written by curthread and thus need no locking.
  */
 #define	TDP_OLDMASK	0x00000001 /* Need to restore mask after suspend. */
 #define	TDP_INKTR	0x00000002 /* Thread is currently in KTR code. */
 #define	TDP_INKTRACE	0x00000004 /* Thread is currently in KTRACE code. */
 #define	TDP_BUFNEED	0x00000008 /* Do not recurse into the buf flush */
 #define	TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock acquisition - deadlock treatment. */
 #define	TDP_NOFAULTING	0x00000080 /* Do not handle page faults. */
 #define	TDP_SIGFASTBLOCK 0x00000100 /* Fast sigblock active */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_SYNCIO	0x00000800 /* Local override, disable async i/o. */
 #define	TDP_SCHED1	0x00001000 /* Reserved for scheduler private use */
 #define	TDP_SCHED2	0x00002000 /* Reserved for scheduler private use */
 #define	TDP_SCHED3	0x00004000 /* Reserved for scheduler private use */
 #define	TDP_SCHED4	0x00008000 /* Reserved for scheduler private use */
 #define	TDP_GEOM	0x00010000 /* Settle GEOM before finishing syscall */
 #define	TDP_SOFTDEP	0x00020000 /* Stuck processing softdep worklist */
 #define	TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
 #define	TDP_WAKEUP	0x00080000 /* Don't sleep in umtx cond_wait */
 #define	TDP_INBDFLUSH	0x00100000 /* Already in BO_BDFLUSH, do not recurse */
 #define	TDP_KTHREAD	0x00200000 /* This is an official kernel thread */
 #define	TDP_CALLCHAIN	0x00400000 /* Capture thread's callchain */
 #define	TDP_IGNSUSP	0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
 #define	TDP_AUDITREC	0x01000000 /* Audit record pending on thread */
 #define	TDP_RFPPWAIT	0x02000000 /* Handle RFPPWAIT on syscall exit */
 #define	TDP_RESETSPUR	0x04000000 /* Reset spurious page fault history. */
 #define	TDP_NERRNO	0x08000000 /* Last errno is already in td_errno */
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
 #define	TDP_INTCPCALLOUT 0x20000000 /* used by netinet/tcp_timer.c */
 #define	TDP_EXECVMSPC	0x40000000 /* Execve destroyed old vmspace */
 #define	TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */
 
 #define	TDP2_SBPAGES	0x00000001 /* Owns sbusy on some pages */
 #define	TDP2_COMPAT32RB	0x00000002 /* compat32 ABI for robust lists */
 #define	TDP2_ACCT	0x00000004 /* Doing accounting */
 
 /*
  * Reasons that the current thread can not be run yet.
  * More than one may apply.
  */
 #define	TDI_SUSPENDED	0x0001	/* On suspension queue. */
 #define	TDI_SLEEPING	0x0002	/* Actually asleep! (tricky). */
 #define	TDI_SWAPPED	0x0004	/* Stack not in mem.  Bad juju if run. */
 #define	TDI_LOCK	0x0008	/* Stopped on a lock. */
 #define	TDI_IWAIT	0x0010	/* Awaiting interrupt. */
 
 #define	TD_IS_SLEEPING(td)	((td)->td_inhibitors & TDI_SLEEPING)
 #define	TD_ON_SLEEPQ(td)	((td)->td_wchan != NULL)
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_IS_SWAPPED(td)	((td)->td_inhibitors & TDI_SWAPPED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #ifdef _KERNEL
 #define	TD_GET_STATE(td)	atomic_load_int(&(td)->td_state)
 #else
 #define	TD_GET_STATE(td)	((td)->td_state)
 #endif
 #define	TD_IS_RUNNING(td)	(TD_GET_STATE(td) == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		(TD_GET_STATE(td) == TDS_RUNQ)
 #define	TD_CAN_RUN(td)		(TD_GET_STATE(td) == TDS_CAN_RUN)
 #define	TD_IS_INHIBITED(td)	(TD_GET_STATE(td) == TDS_INHIBITED)
 #define	TD_ON_UPILOCK(td)	((td)->td_flags & TDF_UPIBLOCKED)
 #define TD_IS_IDLETHREAD(td)	((td)->td_flags & TDF_IDLETD)
 
 #define	TD_CAN_ABORT(td)	(TD_ON_SLEEPQ((td)) &&			\
 				    ((td)->td_flags & TDF_SINTR) != 0)
 
 #define	KTDSTATE(td)							\
 	(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  :		\
 	((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :	\
 	((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" :		\
 	((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" :		\
 	((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding")
 
 #define	TD_SET_INHIB(td, inhib) do {		\
 	TD_SET_STATE(td, TDS_INHIBITED);	\
 	(td)->td_inhibitors |= (inhib);		\
 } while (0)
 
 #define	TD_CLR_INHIB(td, inhib) do {			\
 	if (((td)->td_inhibitors & (inhib)) &&		\
 	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
 		TD_SET_STATE(td, TDS_CAN_RUN);		\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
 #define	TD_SET_SWAPPED(td)	TD_SET_INHIB((td), TDI_SWAPPED)
 #define	TD_SET_LOCK(td)		TD_SET_INHIB((td), TDI_LOCK)
 #define	TD_SET_SUSPENDED(td)	TD_SET_INHIB((td), TDI_SUSPENDED)
 #define	TD_SET_IWAIT(td)	TD_SET_INHIB((td), TDI_IWAIT)
 #define	TD_SET_EXITING(td)	TD_SET_INHIB((td), TDI_EXITING)
 
 #define	TD_CLR_SLEEPING(td)	TD_CLR_INHIB((td), TDI_SLEEPING)
 #define	TD_CLR_SWAPPED(td)	TD_CLR_INHIB((td), TDI_SWAPPED)
 #define	TD_CLR_LOCK(td)		TD_CLR_INHIB((td), TDI_LOCK)
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
 #ifdef _KERNEL
 #define	TD_SET_STATE(td, state)	atomic_store_int(&(td)->td_state, state)
 #else
 #define	TD_SET_STATE(td, state)	(td)->td_state = state
 #endif
 #define	TD_SET_RUNNING(td)	TD_SET_STATE(td, TDS_RUNNING)
 #define	TD_SET_RUNQ(td)		TD_SET_STATE(td, TDS_RUNQ)
 #define	TD_SET_CAN_RUN(td)	TD_SET_STATE(td, TDS_CAN_RUN)
 
 
 #define	TD_SBDRY_INTR(td) \
     (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0)
 #define	TD_SBDRY_ERRNO(td) \
     (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART)
 
 /*
  * Process structure.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 	struct pwddesc	*p_pd;		/* (b) Cwd, chroot, jail, umask */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Resource limits. */
 	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
 	int		p_flag;		/* (c) P_* flags. */
 	int		p_flag2;	/* (c) P2_* flags. */
 	enum p_states {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
 		PRS_ZOMBIE
 	} p_state;			/* (j/c) Process status. */
 	pid_t		p_pid;		/* (b) Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* (d) Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* (g + e) List of processes in pgrp. */
 	struct proc	*p_pptr;	/* (c + e) Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* (e) List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* (e) Pointer to list of children. */
 	struct proc	*p_reaper;	/* (e) My reaper. */
 	LIST_HEAD(, proc) p_reaplist;	/* (e) List of my descendants
 					       (if I am reaper). */
 	LIST_ENTRY(proc) p_reapsibling;	/* (e) List of siblings - descendants of
 					       the same reaper. */
 	struct mtx	p_mtx;		/* (n) Lock for this struct. */
 	struct mtx	p_statmtx;	/* Lock for the stats */
 	struct mtx	p_itimmtx;	/* Lock for the virt/prof timers */
 	struct mtx	p_profmtx;	/* Lock for the profiling */
 	struct ksiginfo *p_ksi;	/* Locked by parent proc lock */
 	sigqueue_t	p_sigqueue;	/* (c) Sigs not delivered to a td. */
 #define p_siglist	p_sigqueue.sq_signals
 	pid_t		p_oppid;	/* (c + e) Real parent pid. */
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_vmspace
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
 	u_int		p_swtick;	/* (c) Tick when swapped in or out. */
 	u_int		p_cowgen;	/* (c) Generation of COW pointers. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage	p_ru;		/* (a) Exit information. */
 	struct rusage_ext p_rux;	/* (cu) Internal resource usage. */
 	struct rusage_ext p_crux;	/* (c) Internal child resource usage. */
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	volatile int	p_exitthreads;	/* (j) Number of threads exiting */
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct ktr_io_params	*p_ktrioparms;	/* (c + o) Params for ktrace. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	struct vnode	*p_textdvp;	/* (b) Dir containing textvp. */
 	char		*p_binname;	/* (b) Binary hardlink name. */
 	u_int		p_lock;		/* (c) Proclock (prevent swap) count. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */
 	int		p_sig;		/* (n) For core dump/debugger XXX. */
 	u_int		p_ptevents;	/* (c + e) ptrace() event mask. */
 	struct kaioinfo	*p_aioinfo;	/* (y) ASYNC I/O info. */
 	struct thread	*p_singlethread;/* (c + j) If single threading this is it */
 	int		p_suspcount;	/* (j) Num threads in suspended mode. */
 	struct thread	*p_xthread;	/* (c) Trap thread */
 	int		p_boundary_count;/* (j) Num threads at user boundary */
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
 	struct procdesc	*p_procdesc;	/* (e) Process descriptor, if any. */
 	u_int		p_treeflag;	/* (e) P_TREE flags */
 	int		p_pendingexits; /* (c) Count of pending thread exits. */
 	struct filemon	*p_filemon;	/* (c) filemon-specific data. */
 	int		p_pdeathsig;	/* (c) Signal from parent on exit. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_endzero
 	u_int		p_magic;	/* (b) Magic number. */
 	int		p_osrel;	/* (x) osreldate for the
 					       binary (from ELF note, if any) */
 	uint32_t	p_fctl0;	/* (x) ABI feature control, ELF note */
 	char		p_comm[MAXCOMLEN + 1];	/* (x) Process name. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
 	rlim_t		p_cpulimit;	/* (c) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c) Process "nice" value. */
 	int		p_fibnum;	/* in this routing domain XXX MRT */
 	pid_t		p_reapsubtree;	/* (e) Pid of the direct child of the
 					       reaper which spawned
 					       our subtree. */
 	uint64_t	p_elf_flags;	/* (x) ELF flags */
 	void		*p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
 						 non ELF binaries. */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xexit
 
 	u_int		p_xexit;	/* (c) Exit code. */
 	u_int		p_xsig;		/* (c) Stop/kill sig. */
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct knlist	*p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (c) Number of threads. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
 	struct label	*p_label;	/* (*) Proc (not subject) MAC label. */
 	STAILQ_HEAD(, ktr_request)	p_ktr;	/* (o) KTR event queue. */
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec. */
 	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
 	struct racct	*p_racct;	/* (b) Resource accounting. */
 	int		p_throttled;	/* (c) Flag for racct pcpu throttling */
 	/*
 	 * An orphan is the child that has been re-parented to the
 	 * debugger as a result of attaching to it.  Need to keep
 	 * track of them for parent to be able to collect the exit
 	 * status of what used to be children.
 	 */
 	LIST_ENTRY(proc) p_orphan;	/* (e) List of orphan processes. */
 	LIST_HEAD(, proc) p_orphans;	/* (e) Pointer to list of orphans. */
 
 	TAILQ_HEAD(, kq_timer_cb_data)	p_kqtim_stop;	/* (c) */
 	LIST_ENTRY(proc) p_jaillist;	/* (d) Jail process linkage. */
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 #define	NOCPU		(-1)	/* For when we aren't on a CPU. */
 #define	NOCPU_OLD	(255)
 #define	MAXCPU_OLD	(254)
 
 #define	PROC_SLOCK(p)	mtx_lock_spin(&(p)->p_slock)
 #define	PROC_SUNLOCK(p)	mtx_unlock_spin(&(p)->p_slock)
 #define	PROC_SLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_slock, (type))
 
 #define	PROC_STATLOCK(p)	mtx_lock_spin(&(p)->p_statmtx)
 #define	PROC_STATUNLOCK(p)	mtx_unlock_spin(&(p)->p_statmtx)
 #define	PROC_STATLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_statmtx, (type))
 
 #define	PROC_ITIMLOCK(p)	mtx_lock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMUNLOCK(p)	mtx_unlock_spin(&(p)->p_itimmtx)
 #define	PROC_ITIMLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_itimmtx, (type))
 
 #define	PROC_PROFLOCK(p)	mtx_lock_spin(&(p)->p_profmtx)
 #define	PROC_PROFUNLOCK(p)	mtx_unlock_spin(&(p)->p_profmtx)
 #define	PROC_PROFLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_profmtx, (type))
 
 /* These flags are kept in p_flag. */
 #define	P_ADVLOCK	0x00000001	/* Process may hold a POSIX advisory
 					   lock. */
 #define	P_CONTROLT	0x00000002	/* Has a controlling terminal. */
 #define	P_KPROC		0x00000004	/* Kernel process. */
 #define	P_UNUSED3	0x00000008	/* --available-- */
 #define	P_PPWAIT	0x00000010	/* Parent is waiting for child to
 					   exec/exit. */
 #define	P_PROFIL	0x00000020	/* Has started profiling. */
 #define	P_STOPPROF	0x00000040	/* Has thread requesting to stop
 					   profiling. */
 #define	P_HADTHREADS	0x00000080	/* Has had threads (no cleanup
 					   shortcuts) */
 #define	P_SUGID		0x00000100	/* Had set id privileges since last
 					   exec. */
 #define	P_SYSTEM	0x00000200	/* System proc: no sigs, stats or
 					   swapping. */
 #define	P_SINGLE_EXIT	0x00000400	/* Threads suspending should exit,
 					   not wait. */
 #define	P_TRACED	0x00000800	/* Debugged process being traced. */
 #define	P_WAITED	0x00001000	/* Someone is waiting for us. */
 #define	P_WEXIT		0x00002000	/* Working on exiting. */
 #define	P_EXEC		0x00004000	/* Process called exec. */
 #define	P_WKILLED	0x00008000	/* Killed, go to kernel/user boundary
 					   ASAP. */
 #define	P_CONTINUED	0x00010000	/* Proc has continued from a stopped
 					   state. */
 #define	P_STOPPED_SIG	0x00020000	/* Stopped due to SIGSTOP/SIGTSTP. */
 #define	P_STOPPED_TRACE	0x00040000	/* Stopped because of tracing. */
 #define	P_STOPPED_SINGLE 0x00080000	/* Only 1 thread can continue (not to
 					   user). */
 #define	P_PROTECTED	0x00100000	/* Do not kill on memory overcommit. */
 #define	P_SIGEVENT	0x00200000	/* Process pending signals changed. */
 #define	P_SINGLE_BOUNDARY 0x00400000	/* Threads should suspend at user
 					   boundary. */
 #define	P_HWPMC		0x00800000	/* Process is using HWPMCs */
 #define	P_JAILED	0x01000000	/* Process is in jail. */
 #define	P_TOTAL_STOP	0x02000000	/* Stopped in stop_all_proc. */
 #define	P_INEXEC	0x04000000	/* Process is in execve(). */
 #define	P_STATCHILD	0x08000000	/* Child process stopped or exited. */
 #define	P_INMEM		0x10000000	/* Loaded into memory. */
 #define	P_SWAPPINGOUT	0x20000000	/* Process is being swapped out. */
 #define	P_SWAPPINGIN	0x40000000	/* Process is being swapped in. */
 #define	P_PPTRACE	0x80000000	/* PT_TRACEME by vforked child. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 #define	P_KILLED(p)	((p)->p_flag & P_WKILLED)
 
 /* These flags are kept in p_flag2. */
 #define	P2_INHERIT_PROTECTED	0x00000001	/* New children get
 						   P_PROTECTED. */
 #define	P2_NOTRACE		0x00000002	/* No ptrace(2) attach or
 						   coredumps. */
 #define	P2_NOTRACE_EXEC		0x00000004	/* Keep P2_NOPTRACE on
 						   exec(2). */
 #define	P2_AST_SU		0x00000008	/* Handles SU ast for
 						   kthreads. */
 #define	P2_PTRACE_FSTP		0x00000010	/* SIGSTOP from PT_ATTACH not
 						   yet handled. */
 #define	P2_TRAPCAP		0x00000020	/* SIGTRAP on ENOTCAPABLE */
 #define	P2_ASLR_ENABLE		0x00000040	/* Force enable ASLR. */
 #define	P2_ASLR_DISABLE		0x00000080	/* Force disable ASLR. */
 #define	P2_ASLR_IGNSTART	0x00000100	/* Enable ASLR to consume sbrk
 						   area. */
 #define	P2_PROTMAX_ENABLE	0x00000200	/* Force enable implied
 						   PROT_MAX. */
 #define	P2_PROTMAX_DISABLE	0x00000400	/* Force disable implied
 						   PROT_MAX. */
 #define	P2_STKGAP_DISABLE	0x00000800	/* Disable stack gap for
 						   MAP_STACK */
 #define	P2_STKGAP_DISABLE_EXEC	0x00001000	/* Stack gap disabled
 						   after exec */
 #define	P2_ITSTOPPED		0x00002000
 #define	P2_PTRACEREQ		0x00004000	/* Active ptrace req */
 #define	P2_NO_NEW_PRIVS		0x00008000	/* Ignore setuid */
 #define	P2_WXORX_DISABLE	0x00010000	/* WX mappings enabled */
 #define	P2_WXORX_ENABLE_EXEC	0x00020000	/* WXORX enabled after exec */
 #define	P2_WEXIT		0x00040000	/* exit just started, no
 						   external thread_single() is
 						   permitted */
 
 /* Flags protected by proctree_lock, kept in p_treeflags. */
 #define	P_TREE_ORPHANED		0x00000001	/* Reparented, on orphan list */
 #define	P_TREE_FIRST_ORPHAN	0x00000002	/* First element of orphan
 						   list */
 #define	P_TREE_REAPER		0x00000004	/* Reaper of subtree */
 #define	P_TREE_GRPEXITED	0x00000008	/* exit1() done with job ctl */
 
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.
  */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SWAIT	6		/* Waiting for interrupt. */
 #define	SLOCK	7		/* Blocked on a lock. */
 
 #define	P_MAGIC		0xbeefface
 
 #ifdef _KERNEL
 
 /* Types and flags for mi_switch(). */
 #define	SW_TYPE_MASK		0xff	/* First 8 bits are switch type */
 #define	SWT_NONE		0	/* Unspecified switch. */
 #define	SWT_PREEMPT		1	/* Switching due to preemption. */
 #define	SWT_OWEPREEMPT		2	/* Switching due to owepreempt. */
 #define	SWT_TURNSTILE		3	/* Turnstile contention. */
 #define	SWT_SLEEPQ		4	/* Sleepq wait. */
 #define	SWT_SLEEPQTIMO		5	/* Sleepq timeout wait. */
 #define	SWT_RELINQUISH		6	/* yield call. */
 #define	SWT_NEEDRESCHED		7	/* NEEDRESCHED was set. */
 #define	SWT_IDLE		8	/* Switching from the idle thread. */
 #define	SWT_IWAIT		9	/* Waiting for interrupts. */
 #define	SWT_SUSPEND		10	/* Thread suspended. */
 #define	SWT_REMOTEPREEMPT	11	/* Remote processor preempted. */
 #define	SWT_REMOTEWAKEIDLE	12	/* Remote processor preempted idle. */
 #define	SWT_COUNT		13	/* Number of switch types. */
 /* Flags */
 #define	SW_VOL		0x0100		/* Voluntary switch. */
 #define	SW_INVOL	0x0200		/* Involuntary switch. */
 #define SW_PREEMPT	0x0400		/* The invol switch is a preemption */
 
 /* How values for thread_single(). */
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
 #define	SINGLE_ALLPROC	3
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PARGS);
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 #endif
 
 #define	FOREACH_PROC_IN_SYSTEM(p)					\
 	LIST_FOREACH((p), &allproc, p_list)
 #define	FOREACH_THREAD_IN_PROC(p, td)					\
 	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
 
 #define	FIRST_THREAD_IN_PROC(p)	TAILQ_FIRST(&(p)->p_threads)
 
 /*
  * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit
  * in a pid_t, as it is used to represent "no process group".
  */
 #define	PID_MAX		99999
 #define	NO_PID		100000
 #define	THREAD0_TID	NO_PID
 extern pid_t pid_max;
 
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 
 /* Lock and unlock a process. */
 #define	PROC_LOCK(p)	mtx_lock(&(p)->p_mtx)
 #define	PROC_TRYLOCK(p)	mtx_trylock(&(p)->p_mtx)
 #define	PROC_UNLOCK(p)	mtx_unlock(&(p)->p_mtx)
 #define	PROC_LOCKED(p)	mtx_owned(&(p)->p_mtx)
 #define	PROC_WAIT_UNLOCKED(p)	mtx_wait_unlocked(&(p)->p_mtx)
 #define	PROC_LOCK_ASSERT(p, type)	mtx_assert(&(p)->p_mtx, (type))
 
 /* Lock and unlock a process group. */
 #define	PGRP_LOCK(pg)	mtx_lock(&(pg)->pg_mtx)
 #define	PGRP_UNLOCK(pg)	mtx_unlock(&(pg)->pg_mtx)
 #define	PGRP_LOCKED(pg)	mtx_owned(&(pg)->pg_mtx)
 #define	PGRP_LOCK_ASSERT(pg, type)	mtx_assert(&(pg)->pg_mtx, (type))
 
 #define	PGRP_LOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_LOCK(pg);						\
 } while (0)
 #define	PGRP_UNLOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_UNLOCK(pg);					\
 } while (0)
 
 /* Lock and unlock a session. */
 #define	SESS_LOCK(s)	mtx_lock(&(s)->s_mtx)
 #define	SESS_UNLOCK(s)	mtx_unlock(&(s)->s_mtx)
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
 /*
  * Non-zero p_lock ensures that:
  * - exit1() is not performed until p_lock reaches zero;
  * - the process' threads stack are not swapped out if they are currently
  *   not (P_INMEM).
  *
  * PHOLD() asserts that the process (except the current process) is
  * not exiting, increments p_lock and swaps threads stacks into memory,
  * if needed.
  * _PHOLD() is same as PHOLD(), it takes the process locked.
  * _PHOLD_LITE() also takes the process locked, but comparing with
  * _PHOLD(), it only guarantees that exit1() is not executed,
  * faultin() is not called.
  */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
 	PROC_UNLOCK(p);							\
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 	if (((p)->p_flag & P_INMEM) == 0)				\
 		faultin((p));						\
 } while (0)
 #define	_PHOLD_LITE(p) do {						\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process %p", p));			\
 	(p)->p_lock++;							\
 } while (0)
 #define	PROC_ASSERT_HELD(p) do {					\
 	KASSERT((p)->p_lock > 0, ("process %p not held", p));		\
 } while (0)
 
 #define	PRELE(p) do {							\
 	PROC_LOCK((p));							\
 	_PRELE((p));							\
 	PROC_UNLOCK((p));						\
 } while (0)
 #define	_PRELE(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	PROC_ASSERT_HELD(p);						\
 	(--(p)->p_lock);						\
 	if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0)		\
 		wakeup(&(p)->p_lock);					\
 } while (0)
 #define	PROC_ASSERT_NOT_HELD(p) do {					\
 	KASSERT((p)->p_lock == 0, ("process %p held", p));		\
 } while (0)
 
 #define	PROC_UPDATE_COW(p) do {						\
 	struct proc *_p = (p);						\
 	PROC_LOCK_ASSERT((_p), MA_OWNED);				\
 	atomic_store_int(&_p->p_cowgen, _p->p_cowgen + 1);		\
 } while (0)
 
 #define	PROC_COW_CHANGECOUNT(td, p) ({					\
 	struct thread *_td = (td);					\
 	struct proc *_p = (p);						\
 	MPASS(_td == curthread);					\
 	PROC_LOCK_ASSERT(_p, MA_OWNED);					\
 	_p->p_cowgen - _td->td_cowgen;					\
 })
 
 /* Check whether a thread is safe to be swapped out. */
 #define	thread_safetoswapout(td)	((td)->td_flags & TDF_CANSWAP)
 
 /* Control whether or not it is safe for curthread to sleep. */
 #define	THREAD_NO_SLEEPING()		do {				\
 	curthread->td_no_sleeping++;					\
 	MPASS(curthread->td_no_sleeping > 0);				\
 } while (0)
 
 #define	THREAD_SLEEPING_OK()		do {				\
 	MPASS(curthread->td_no_sleeping > 0);				\
 	curthread->td_no_sleeping--;					\
 } while (0)
 
 #define	THREAD_CAN_SLEEP()		((curthread)->td_no_sleeping == 0)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 #define	PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern struct sx *pidhashtbl_lock;
 extern u_long pidhash;
 extern u_long pidhashlock;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct sx allproc_lock;
 extern int allproc_gen;
 extern struct sx proctree_lock;
 extern struct mtx ppeers_lock;
 extern struct mtx procid_lock;
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread0_storage thread0_st;	/* Primary thread in proc0. */
 #define	thread0 (thread0_st.t0st_thread)
 extern struct vmspace vmspace0;		/* VM space for proc0. */
 extern int hogticks;			/* Limit on kernel cpu hogs. */
 extern int lastpid;
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern u_long ps_arg_cache_limit;
 
 LIST_HEAD(proclist, proc);
 TAILQ_HEAD(procqueue, proc);
 TAILQ_HEAD(threadqueue, thread);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 extern struct uma_zone *proc_zone;
 extern struct uma_zone *pgrp_zone;
 
 struct	proc *pfind(pid_t);		/* Find process by id. */
 struct	proc *pfind_any(pid_t);		/* Find (zombie) process by id. */
 struct	proc *pfind_any_locked(pid_t pid); /* Find process by id, locked. */
 struct	pgrp *pgfind(pid_t);		/* Find process group by id. */
 void	pidhash_slockall(void);		/* Shared lock all pid hash lists. */
 void	pidhash_sunlockall(void);	/* Shared unlock all pid hash lists. */
 
 struct	fork_req {
 	int		fr_flags;
 	int		fr_pages;
 	int 		*fr_pidp;
 	struct proc 	**fr_procp;
 	int 		*fr_pd_fd;
 	int 		fr_pd_flags;
 	struct filecaps	*fr_pd_fcaps;
 	int 		fr_flags2;
 #define	FR2_DROPSIG_CAUGHT	0x00000001 /* Drop caught non-DFL signals */
 #define	FR2_SHARE_PATHS		0x00000002 /* Invert sense of RFFDG for paths */
 #define	FR2_KPROC		0x00000004 /* Create a kernel process */
 };
 
 /*
  * pget() flags.
  */
 #define	PGET_HOLD	0x00001	/* Hold the process. */
 #define	PGET_CANSEE	0x00002	/* Check against p_cansee(). */
 #define	PGET_CANDEBUG	0x00004	/* Check against p_candebug(). */
 #define	PGET_ISCURRENT	0x00008	/* Check that the found process is current. */
 #define	PGET_NOTWEXIT	0x00010	/* Check that the process is not in P_WEXIT. */
 #define	PGET_NOTINEXEC	0x00020	/* Check that the process is not in P_INEXEC. */
 #define	PGET_NOTID	0x00040	/* Do not assume tid if pid > PID_MAX. */
 
 #define	PGET_WANTREAD	(PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT)
 
 int	pget(pid_t pid, int flags, struct proc **pp);
 
 /* ast_register() flags */
 #define	ASTR_ASTF_REQUIRED	0x0001	/* td_ast TDAI(TDA_X) flag set is
 					   required for call */
 #define	ASTR_TDP		0x0002	/* td_pflags flag set is required */
 #define	ASTR_KCLEAR		0x0004	/* call me on ast_kclear() */
 #define	ASTR_UNCOND		0x0008	/* call me always */
 
 void	ast(struct trapframe *framep);
 void	ast_kclear(struct thread *td);
 void	ast_register(int ast, int ast_flags, int tdp,
 	    void (*f)(struct thread *td, int asts));
 void	ast_deregister(int tda);
 void	ast_sched_locked(struct thread *td, int tda);
 void	ast_sched_mask(struct thread *td, int ast);
 void	ast_sched(struct thread *td, int tda);
 void	ast_unsched_locked(struct thread *td, int tda);
 
 struct	thread *choosethread(void);
 int	cr_cansee(struct ucred *u1, struct ucred *u2);
 int	cr_canseesocket(struct ucred *cred, struct socket *so);
 int	cr_canseeothergids(struct ucred *u1, struct ucred *u2);
 int	cr_canseeotheruids(struct ucred *u1, struct ucred *u2);
 int	cr_canseejailproc(struct ucred *u1, struct ucred *u2);
 int	cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
 int	enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
 	    struct session *sess);
 int	enterthispgrp(struct proc *p, struct pgrp *pgrp);
 void	faultin(struct proc *p);
 int	fork1(struct thread *, struct fork_req *);
 void	fork_exit(void (*)(void *, struct trapframe *), void *,
 	    struct trapframe *);
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void	itimer_proc_continue(struct proc *p);
 void	kqtimer_proc_continue(struct proc *p);
 void	kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry,
 	    int *resident_count, bool *super);
 void	kern_yield(int);
 void 	kick_proc0(void);
 void	killjobc(void);
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	maybe_yield(void);
 void	mi_switch(int flags);
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
 int	p_cansignal(struct thread *td, struct proc *p, int signum);
 int	p_canwait(struct thread *td, struct proc *p);
 struct	pargs *pargs_alloc(int len);
 void	pargs_drop(struct pargs *pa);
 void	pargs_hold(struct pargs *pa);
 void	proc_add_orphan(struct proc *child, struct proc *parent);
 int	proc_get_binpath(struct proc *p, char *binname, char **fullpath,
 	    char **freepath);
 int	proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
 void	procinit(void);
 int	proc_iterate(int (*cb)(struct proc *, void *), void *cbarg);
 void	proc_linkup0(struct proc *p, struct thread *td);
 void	proc_linkup(struct proc *p, struct thread *td);
 struct proc *proc_realparent(struct proc *child);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
 void	proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid);
 void	proc_set_p2_wexit(struct proc *p);
 void	proc_set_traced(struct proc *p, bool stop);
 void	proc_wkilled(struct proc *p);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 void	proc_clear_orphan(struct proc *p);
 void	reaper_abandon_children(struct proc *p, bool exiting);
 int	securelevel_ge(struct ucred *cr, int level);
 int	securelevel_gt(struct ucred *cr, int level);
 void	sess_hold(struct session *);
 void	sess_release(struct session *);
 int	setrunnable(struct thread *, int);
 void	setsugid(struct proc *p);
 int	should_yield(void);
 int	sigonstack(size_t sp);
 void	stopevent(struct proc *, u_int, u_int);
 struct	thread *tdfind(lwpid_t, pid_t);
 void	threadinit(void);
 void	tidhash_add(struct thread *);
 void	tidhash_remove(struct thread *);
 void	cpu_idle(int);
 int	cpu_idle_wakeup(int);
 extern	void (*cpu_idle_hook)(sbintime_t);	/* Hook to machdep CPU idler. */
 void	cpu_switch(struct thread *, struct thread *, struct mtx *);
 void	cpu_throw(struct thread *, struct thread *) __dead2;
 bool	curproc_sigkilled(void);
 void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int, int) __dead2;
 void	cpu_copy_thread(struct thread *td, struct thread *td0);
 bool	cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map);
 int	cpu_fetch_syscall_args(struct thread *td);
 void	cpu_fork(struct thread *, struct proc *, struct thread *, int);
 void	cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *);
 int	cpu_procctl(struct thread *td, int idtype, id_t id, int com,
 	    void *data);
 void	cpu_set_syscall_retval(struct thread *, int);
 void	cpu_set_upcall(struct thread *, void (*)(void *), void *,
 	    stack_t *);
 int	cpu_set_user_tls(struct thread *, void *tls_base);
 void	cpu_thread_alloc(struct thread *);
 void	cpu_thread_clean(struct thread *);
 void	cpu_thread_exit(struct thread *);
 void	cpu_thread_free(struct thread *);
 void	cpu_thread_swapin(struct thread *);
 void	cpu_thread_swapout(struct thread *);
 struct	thread *thread_alloc(int pages);
 int	thread_alloc_stack(struct thread *, int pages);
 int	thread_check_susp(struct thread *td, bool sleep);
 void	thread_cow_get_proc(struct thread *newtd, struct proc *p);
 void	thread_cow_get(struct thread *newtd, struct thread *td);
 void	thread_cow_free(struct thread *td);
 void	thread_cow_update(struct thread *td);
 void	thread_cow_synced(struct thread *td);
 int	thread_create(struct thread *td, struct rtprio *rtp,
 	    int (*initialize_thread)(struct thread *, void *), void *thunk);
 void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
 void	thread_reap_barrier(void);
 int	thread_single(struct proc *p, int how);
 void	thread_single_end(struct proc *p, int how);
 void	thread_stash(struct thread *td);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
 void	childproc_continued(struct proc *child);
 void	childproc_exited(struct proc *child);
 void	thread_run_flash(struct thread *td);
 int	thread_suspend_check(int how);
 bool	thread_suspend_check_needed(void);
 void	thread_suspend_switch(struct thread *, struct proc *p);
 void	thread_suspend_one(struct thread *td);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
 void	thread_wait(struct proc *p);
 
 bool	stop_all_proc_block(void);
 void	stop_all_proc_unblock(void);
 void	stop_all_proc(void);
 void	resume_all_proc(void);
 
 static __inline int
 curthread_pflags_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags & flags);
 	td->td_pflags |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags_restore(int save)
 {
 
 	curthread->td_pflags &= save;
 }
 
 static __inline int
 curthread_pflags2_set(int flags)
 {
 	struct thread *td;
 	int save;
 
 	td = curthread;
 	save = ~flags | (td->td_pflags2 & flags);
 	td->td_pflags2 |= flags;
 	return (save);
 }
 
 static __inline void
 curthread_pflags2_restore(int save)
 {
 
 	curthread->td_pflags2 &= save;
 }
 
-static __inline bool
-kstack_contains(struct thread *td, vm_offset_t va, size_t len)
-{
-	return (va >= td->td_kstack && va + len >= va &&
-	    va + len <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
-}
-
 static __inline __pure2 struct td_sched *
 td_get_sched(struct thread *td)
 {
 
 	return ((struct td_sched *)&td[1]);
 }
 
 #define	PROC_ID_PID	0
 #define	PROC_ID_GROUP	1
 #define	PROC_ID_SESSION	2
 #define	PROC_ID_REAP	3
 
 void	proc_id_set(int type, pid_t id);
 void	proc_id_set_cond(int type, pid_t id);
 void	proc_id_clear(int type, pid_t id);
 
 EVENTHANDLER_LIST_DECLARE(process_ctor);
 EVENTHANDLER_LIST_DECLARE(process_dtor);
 EVENTHANDLER_LIST_DECLARE(process_init);
 EVENTHANDLER_LIST_DECLARE(process_fini);
 EVENTHANDLER_LIST_DECLARE(process_exit);
 EVENTHANDLER_LIST_DECLARE(process_fork);
 EVENTHANDLER_LIST_DECLARE(process_exec);
 
 EVENTHANDLER_LIST_DECLARE(thread_ctor);
 EVENTHANDLER_LIST_DECLARE(thread_dtor);
 EVENTHANDLER_LIST_DECLARE(thread_init);
 
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
diff --git a/sys/x86/x86/stack_machdep.c b/sys/x86/x86/stack_machdep.c
index 1243137d2ea0..5d7dfd251b0d 100644
--- a/sys/x86/x86/stack_machdep.c
+++ b/sys/x86/x86/stack_machdep.c
@@ -1,176 +1,176 @@
 /*-
  * Copyright (c) 2015 EMC Corporation
  * Copyright (c) 2005 Antoine Brodin
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/stack.h>
 
 #include <machine/pcb.h>
 #include <machine/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
-#include <x86/stack.h>
+#include <machine/stack.h>
 
 #ifdef __i386__
 #define	PCB_FP(pcb)	((pcb)->pcb_ebp)
 #define	TF_FLAGS(tf)	((tf)->tf_eflags)
 #define	TF_FP(tf)	((tf)->tf_ebp)
 #define	TF_PC(tf)	((tf)->tf_eip)
 
 typedef struct i386_frame *x86_frame_t;
 #else
 #define	PCB_FP(pcb)	((pcb)->pcb_rbp)
 #define	TF_FLAGS(tf)	((tf)->tf_rflags)
 #define	TF_FP(tf)	((tf)->tf_rbp)
 #define	TF_PC(tf)	((tf)->tf_rip)
 
 typedef struct amd64_frame *x86_frame_t;
 #endif
 
 #ifdef SMP
 static struct stack *stack_intr_stack;
 static struct thread *stack_intr_td;
 static struct mtx intr_lock;
 MTX_SYSINIT(intr_lock, &intr_lock, "stack intr", MTX_DEF);
 #endif
 
 static void __nosanitizeaddress __nosanitizememory
 stack_capture(struct thread *td, struct stack *st, register_t fp)
 {
 	x86_frame_t frame;
 	vm_offset_t callpc;
 
 	stack_zero(st);
 	frame = (x86_frame_t)fp;
 	while (1) {
 		if (!kstack_contains(td, (vm_offset_t)frame, sizeof(*frame)))
 			break;
 		callpc = frame->f_retaddr;
 		if (!INKERNEL(callpc))
 			break;
 		if (stack_put(st, callpc) == -1)
 			break;
 		if (frame->f_frame <= frame)
 			break;
 		frame = frame->f_frame;
 	}
 }
 
 #ifdef SMP
 void
 stack_capture_intr(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	stack_capture(td, stack_intr_stack, TF_FP(td->td_intr_frame));
 	atomic_store_rel_ptr((void *)&stack_intr_td, (uintptr_t)td);
 }
 #endif
 
 int
 stack_save_td(struct stack *st, struct thread *td)
 {
 	int cpuid, error;
 	bool done;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(!TD_IS_SWAPPED(td),
 	    ("stack_save_td: thread %p is swapped", td));
 	if (TD_IS_RUNNING(td) && td != curthread)
 		PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 
 	if (td == curthread) {
 		stack_save(st);
 		return (0);
 	}
 
 	for (done = false, error = 0; !done;) {
 		if (!TD_IS_RUNNING(td)) {
 			/*
 			 * The thread will not start running so long as we hold
 			 * its lock.
 			 */
 			stack_capture(td, st, PCB_FP(td->td_pcb));
 			error = 0;
 			break;
 		}
 
 #ifdef SMP
 		thread_unlock(td);
 		cpuid = atomic_load_int(&td->td_oncpu);
 		if (cpuid == NOCPU) {
 			cpu_spinwait();
 		} else {
 			mtx_lock(&intr_lock);
 			stack_intr_td = NULL;
 			stack_intr_stack = st;
 			ipi_cpu(cpuid, IPI_TRACE);
 			while (atomic_load_acq_ptr((void *)&stack_intr_td) ==
 			    (uintptr_t)NULL)
 				cpu_spinwait();
 			if (stack_intr_td == td) {
 				done = true;
 				error = st->depth > 0 ? 0 : EBUSY;
 			}
 			stack_intr_td = NULL;
 			mtx_unlock(&intr_lock);
 		}
 		thread_lock(td);
 #else
 		(void)cpuid;
 		KASSERT(0, ("%s: multiple running threads", __func__));
 #endif
 	}
 
 	return (error);
 }
 
 void
 stack_save(struct stack *st)
 {
 	register_t fp;
 
 #ifdef __i386__
 	__asm __volatile("movl %%ebp,%0" : "=g" (fp));
 #else
 	__asm __volatile("movq %%rbp,%0" : "=g" (fp));
 #endif
 	stack_capture(curthread, st, fp);
 }