Index: head/contrib/elftoolchain/libelf/gelf_mips64el.c
===================================================================
--- head/contrib/elftoolchain/libelf/gelf_mips64el.c	(revision 344854)
+++ head/contrib/elftoolchain/libelf/gelf_mips64el.c	(revision 344855)
@@ -1,82 +1,81 @@
 /*-
  * Copyright (c) 2018 John Baldwin
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <gelf.h>
 
 #include "_libelf.h"
 
 ELFTC_VCSID("$Id$");
 
 int
 _libelf_is_mips64el(Elf *e)
 {
 
 	return (e->e_kind == ELF_K_ELF && e->e_class == ELFCLASS64 &&
 	    e->e_u.e_elf.e_ehdr.e_ehdr64->e_machine == EM_MIPS &&
 	    e->e_u.e_elf.e_ehdr.e_ehdr64->e_ident[EI_DATA] == ELFDATA2LSB);
 }
 
 /*
  * For MIPS64, the r_info field is actually stored as a 32-bit symbol
  * index (r_sym) followed by four single-byte fields (r_ssym, r_type3,
  * r_type2, and r_type).  The byte-swap for the little-endian case
  * jumbles this incorrectly so compensate.
  */
 Elf64_Xword
 _libelf_mips64el_r_info_tof(Elf64_Xword r_info)
 {
 	Elf64_Xword new_info;
 	uint8_t ssym, type3, type2, type;
 
 	ssym = r_info >> 24;
 	type3 = r_info >> 16;
 	type2 = r_info >> 8;
 	type = r_info;
 	new_info = r_info >> 32;
 	new_info |= (Elf64_Xword)ssym << 32;
 	new_info |= (Elf64_Xword)type3 << 40;
 	new_info |= (Elf64_Xword)type2 << 48;
 	new_info |= (Elf64_Xword)type << 56;
 	return (new_info);
 }
 
 Elf64_Xword
 _libelf_mips64el_r_info_tom(Elf64_Xword r_info)
 {
 	Elf64_Xword new_info;
 	uint8_t ssym, type3, type2, type;
 
 	ssym = r_info >> 32;
 	type3 = r_info >> 40;
 	type2 = r_info >> 48;
 	type = r_info >> 56;
 	new_info = (r_info & 0xffffffff) << 32;
 	new_info |= (Elf64_Xword)ssym << 24;
 	new_info |= (Elf64_Xword)type3 << 16;
 	new_info |= (Elf64_Xword)type2 << 8;
 	new_info |= (Elf64_Xword)type;
 	return (new_info);
 }
Index: head/lib/libc/tests/gen/makecontext_test.c
===================================================================
--- head/lib/libc/tests/gen/makecontext_test.c	(revision 344854)
+++ head/lib/libc/tests/gen/makecontext_test.c	(revision 344855)
@@ -1,189 +1,188 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <atf-c.h>
 #include <ucontext.h>
 
 static char uc_stack[16 * 1024];
 
 static void
 check_1(int arg1)
 {
 
 	ATF_REQUIRE_EQ(arg1, 1);
 }
 
 ATF_TC_WITHOUT_HEAD(makecontext_arg1);
 ATF_TC_BODY(makecontext_arg1, tc)
 {
 	ucontext_t ctx[2];
 
 	ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0);
 	ctx[1].uc_stack.ss_sp = uc_stack;
 	ctx[1].uc_stack.ss_size = sizeof(uc_stack);
 	ctx[1].uc_link = &ctx[0];
 	makecontext(&ctx[1], (void (*)(void))check_1, 1, 1);
 
 	ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0);
 }
 
 static void
 check_2(int arg1, int arg2)
 {
 
 	ATF_REQUIRE_EQ(arg1, 1);
 	ATF_REQUIRE_EQ(arg2, 2);
 }
 
 ATF_TC_WITHOUT_HEAD(makecontext_arg2);
 ATF_TC_BODY(makecontext_arg2, tc)
 {
 	ucontext_t ctx[2];
 
 	ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0);
 	ctx[1].uc_stack.ss_sp = uc_stack;
 	ctx[1].uc_stack.ss_size = sizeof(uc_stack);
 	ctx[1].uc_link = &ctx[0];
 	makecontext(&ctx[1], (void (*)(void))check_2, 2, 1, 2);
 
 	ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0);
 }
 
 static void
 check_3(int arg1, int arg2, int arg3)
 {
 
 	ATF_REQUIRE_EQ(arg1, 1);
 	ATF_REQUIRE_EQ(arg2, 2);
 	ATF_REQUIRE_EQ(arg3, 3);
 }
 
 ATF_TC_WITHOUT_HEAD(makecontext_arg3);
 ATF_TC_BODY(makecontext_arg3, tc)
 {
 	ucontext_t ctx[2];
 
 	ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0);
 	ctx[1].uc_stack.ss_sp = uc_stack;
 	ctx[1].uc_stack.ss_size = sizeof(uc_stack);
 	ctx[1].uc_link = &ctx[0];
 	makecontext(&ctx[1], (void (*)(void))check_3, 3, 1, 2, 3);
 
 	ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0);
 }
 
 static void
 check_4(int arg1, int arg2, int arg3, int arg4)
 {
 
 	ATF_REQUIRE_EQ(arg1, 1);
 	ATF_REQUIRE_EQ(arg2, 2);
 	ATF_REQUIRE_EQ(arg3, 3);
 	ATF_REQUIRE_EQ(arg4, 4);
 }
 
 ATF_TC_WITHOUT_HEAD(makecontext_arg4);
 ATF_TC_BODY(makecontext_arg4, tc)
 {
 	ucontext_t ctx[2];
 
 	ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0);
 	ctx[1].uc_stack.ss_sp = uc_stack;
 	ctx[1].uc_stack.ss_size = sizeof(uc_stack);
 	ctx[1].uc_link = &ctx[0];
 	makecontext(&ctx[1], (void (*)(void))check_4, 4, 1, 2, 3, 4);
 
 	ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0);
 }
 
 static void
 check_5(int arg1, int arg2, int arg3, int arg4, int arg5)
 {
 
 	ATF_REQUIRE_EQ(arg1, 1);
 	ATF_REQUIRE_EQ(arg2, 2);
 	ATF_REQUIRE_EQ(arg3, 3);
 	ATF_REQUIRE_EQ(arg4, 4);
 	ATF_REQUIRE_EQ(arg5, 5);
 }
 
 ATF_TC_WITHOUT_HEAD(makecontext_arg5);
 ATF_TC_BODY(makecontext_arg5, tc)
 {
 	ucontext_t ctx[2];
 
 	ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0);
 	ctx[1].uc_stack.ss_sp = uc_stack;
 	ctx[1].uc_stack.ss_size = sizeof(uc_stack);
 	ctx[1].uc_link = &ctx[0];
 	makecontext(&ctx[1], (void (*)(void))check_5, 5, 1, 2, 3, 4, 5);
 
 	ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0);
 }
 
 static void
 check_6(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6)
 {
 
 	ATF_REQUIRE_EQ(arg1, 1);
 	ATF_REQUIRE_EQ(arg2, 2);
 	ATF_REQUIRE_EQ(arg3, 3);
 	ATF_REQUIRE_EQ(arg4, 4);
 	ATF_REQUIRE_EQ(arg5, 5);
 	ATF_REQUIRE_EQ(arg6, 6);
 }
 
 ATF_TC_WITHOUT_HEAD(makecontext_arg6);
 ATF_TC_BODY(makecontext_arg6, tc)
 {
 	ucontext_t ctx[2];
 
 	ATF_REQUIRE_EQ(getcontext(&ctx[1]), 0);
 	ctx[1].uc_stack.ss_sp = uc_stack;
 	ctx[1].uc_stack.ss_size = sizeof(uc_stack);
 	ctx[1].uc_link = &ctx[0];
 	makecontext(&ctx[1], (void (*)(void))check_6, 6, 1, 2, 3, 4, 5, 6);
 
 	ATF_REQUIRE_EQ(swapcontext(&ctx[0], &ctx[1]), 0);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, makecontext_arg1);
 	ATF_TP_ADD_TC(tp, makecontext_arg2);
 	ATF_TP_ADD_TC(tp, makecontext_arg3);
 	ATF_TP_ADD_TC(tp, makecontext_arg4);
 	ATF_TP_ADD_TC(tp, makecontext_arg5);
 	ATF_TP_ADD_TC(tp, makecontext_arg6);
 
 	return (atf_no_error());
 }
Index: head/lib/libdevctl/devctl.3
===================================================================
--- head/lib/libdevctl/devctl.3	(revision 344854)
+++ head/lib/libdevctl/devctl.3	(revision 344855)
@@ -1,394 +1,393 @@
 .\"
 .\" Copyright (c) 2014 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd August 22, 2018
 .Dt DEVCTL 3
 .Os
 .Sh NAME
 .Nm devctl ,
 .Nm devctl_attach ,
 .Nm devctl_clear_driver ,
 .Nm devctl_delete ,
 .Nm devctl_detach ,
 .Nm devctl_disable ,
 .Nm devctl_enable ,
 .Nm devctl_freeze ,
 .Nm devctl_rescan ,
 .Nm devctl_resume ,
 .Nm devctl_set_driver ,
 .Nm devctl_suspend ,
 .Nm devctl_thaw
 .Nd device control library
 .Sh LIBRARY
 .Lb libdevctl
 .Sh SYNOPSIS
 .In devctl.h
 .Ft int
 .Fn devctl_attach "const char *device"
 .Ft int
 .Fn devctl_clear_driver "const char *device" "bool force"
 .Ft int
 .Fn devctl_delete "const char *device" "bool force"
 .Ft int
 .Fn devctl_detach "const char *device" "bool force"
 .Ft int
 .Fn devctl_disable "const char *device" "bool force_detach"
 .Ft int
 .Fn devctl_enable "const char *device"
 .Ft int
 .Fn devctl_freeze "void"
 .Ft int
 .Fn devctl_rescan "const char *device"
 .Ft int
 .Fn devctl_resume "const char *device"
 .Ft int
 .Fn devctl_set_driver "const char *device" "const char *driver" "bool force"
 .Ft int
 .Fn devctl_suspend "const char *device"
 .Ft int
 .Fn devctl_thaw "void"
 .Sh DESCRIPTION
 The
 .Nm
 library adjusts the state of devices in the kernel's internal device
 hierarchy.
 Each control operation accepts a
 .Fa device
 argument that identifies the device to adjust.
 The
 .Fa device
 may be specified as either the name of an existing device or as a
 bus-specific address.
 The following bus-specific address formats are currently supported:
 .Bl -tag -offset indent
 .It Sy pci Ns Fa domain Ns : Ns Fa bus Ns : Ns Fa slot Ns : Ns Fa function
 A PCI device with the specified
 .Fa domain ,
 .Fa bus ,
 .Fa slot ,
 and
 .Fa function .
 .It Sy pci Ns Fa bus Ns : Ns Fa slot Ns : Ns Fa function
 A PCI device in domain zero with the specified
 .Fa bus ,
 .Fa slot ,
 and
 .Fa function .
 .It Fa handle
 A device with an ACPI handle of
 .Fa handle .
 The handle must be specified as an absolute path and must begin with a
 .Dq \e .
 .El
 .Pp
 The
 .Fn devctl_attach
 function probes a device and attaches a suitable device driver if one is
 found.
 .Pp
 The
 .Fn devctl_detach
 function detaches a device from its current device driver.
 The device is left detached until either a new driver for its parent
 bus is loaded or the device is explicitly probed via
 .Fn devctl_attach .
 If
 .Fa force
 is true,
 the current device driver will be detached even if the device is busy.
 .Pp
 The
 .Fn devctl_delete
 function deletes a device from the device tree.
 No
 If
 .Fa force
 is true,
 the device is deleted even if the device is physically present.
 .Pp
 The
 .Fn devctl_disable
 function disables a device.
 If the device is currently attached to a device driver,
 the device driver will be detached from the device,
 but the device will retain its current name.
 If
 .Fa force_detach
 is true,
 the current device driver will be detached even if the device is busy.
 The device will remain disabled and detached until it is explicitly enabled
 via
 .Fn devctl_enable .
 .Pp
 The
 .Fn devctl_enable
 function re-enables a disabled device.
 The device will probe and attach if a suitable device driver is found.
 .Pp
 The
 .Fn devctl_suspend
 function suspends a device.
 This may include placing the device in a reduced power state,
 but any device driver currently attached to the device will remain attached.
 .Pp
 The
 .Fn devctl_resume
 function resumes a suspended device to a fully working state.
 .Pp
 The
 .Fn devctl_set_driver
 function attaches a device driver named
 .Fa driver
 to a device.
 If the device is already attached and
 .Fa force
 is false,
 the request will fail.
 If the device is already attached and
 .Fa force
 is true,
 the device will be detached from its current device driver before it is
 attached to the new device driver.
 .Pp
 The
 .Fn devctl_clear_driver
 function resets a device so that it can be attached to any valid device
 driver rather than only drivers with a previously specified name.
 This function is used to undo a previous call to
 .Fn devctl_set_driver .
 If the device is already attached and
 .Fa force
 is false,
 the request will fail.
 If the device is already attached and
 .Fa force
 is true,
 the device will be detached from its current device driver.
 After the device's name is reset,
 it is reprobed and attached to a suitable device driver if one is found.
 .Pp
 The
 .Fn devctl_rescan
 function rescans a bus device checking for devices that have been added or
 removed.
 .Pp
 The
 .Fn devctl_freeze
 function freezes probe and attach processing initiated in response to
 drivers being loaded.
 .Pp
 The
 .Fn devctl_thaw
 function resumes (thaws the freeze) probe and attach processing
 initiated in response to drivers being loaded.
 .Sh RETURN VALUES
 .Rv -std devctl_attach devctl_clear_driver devctl_delete devctl_detach \
 devctl_disable devctl_enable devctl_suspend devctl_rescan devctl_resume \
 devctl_set_driver
 .Sh ERRORS
 In addition to specific errors noted below,
 all of the
 .Nm
 functions may fail for any of the errors described in
 .Xr open 2
 as well as:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 The device name is too long.
 .It Bq Er ENOENT
 No existing device matches the specified name or location.
 .It Bq Er EPERM
 The current process is not permitted to adjust the state of
 .Fa device .
 .El
 .Pp
 The
 .Fn devctl_attach
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The device is already attached.
 .It Bq Er ENOMEM
 An internal memory allocation request failed.
 .It Bq Er ENXIO
 The device is disabled.
 .It Bq Er ENXIO
 No suitable driver for the device could be found,
 or the driver failed to attach.
 .El
 .Pp
 The
 .Fn devctl_detach
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The current device driver for
 .Fa device
 is busy and cannot detach at this time.
 Note that some drivers may return this even if
 .Fa force
 is true.
 .It Bq Er ENXIO
 The device is not attached to a driver.
 .It Bq Er ENXIO
 The current device driver for
 .Fa device
 does not support detaching.
 .El
 .Pp
 The
 .Fn devctl_enable
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The device is already enabled.
 .It Bq Er ENOMEM
 An internal memory allocation request failed.
 .It Bq Er ENXIO
 No suitable driver for the device could be found,
 or the driver failed to attach.
 .El
 .Pp
 The
 .Fn devctl_disable
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The current device driver for
 .Fa device
 is busy and cannot detach at this time.
 Note that some drivers may return this even if
 .Fa force_detach
 is true.
 .It Bq Er ENXIO
 The device is already disabled.
 .It Bq Er ENXIO
 The current device driver for
 .Fa device
 does not support detaching.
 .El
 .Pp
 The
 .Fn devctl_suspend
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The device is already suspended.
 .It Bq Er EINVAL
 The device to be suspended is the root bus device.
 .El
 .Pp
 The
 .Fn devctl_resume
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 The device is not suspended.
 .It Bq Er EINVAL
 The device to be resumed is the root bus device.
 .El
 .Pp
 The
 .Fn devctl_set_driver
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The device is currently attached to a device driver and
 .Fa force
 is false.
 .It Bq Er EBUSY
 The current device driver for
 .Fa device
 is busy and cannot detach at this time.
 .It Bq Er EFAULT
 The
 .Fa driver
 argument points outside the process' allocated address space.
 .It Bq Er ENOENT
 No device driver with the requested name exists.
 .It Bq Er ENOMEM
 An internal memory allocation request failed.
 .It Bq Er ENXIO
 The device is disabled.
 .It Bq Er ENXIO
 The new device driver failed to attach.
 .El
 .Pp
 The
 .Fn devctl_clear_driver
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The device is currently attached to a device driver and
 .Fa force
 is false.
 .It Bq Er EBUSY
 The current device driver for
 .Fa device
 is busy and cannot detach at this time.
 .It Bq Er EINVAL
 The device is not configured for a specific device driver name.
 .It Bq Er ENXIO
 The device driver chosen after reprobing failed to attach.
 .El
 .Pp
 The
 .Fn devctl_rescan
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er ENXIO
 The device is not attached to a driver.
 .It Bq Er ENXIO
 The bus driver does not support rescanning.
 .El
 .Pp
 The
 .Fn devctl_delete
 function may fail if:
 .Bl -tag -width Er
 .It Bq Er EBUSY
 The device is physically present and
 .Fa force
 is false.
 .It Bq Er EINVAL
 .Fa dev
 is the root device of the device tree.
 .El
 .Sh SEE ALSO
 .Xr devinfo 3 ,
 .Xr devstat 3 ,
 .Xr devctl 8
 .Sh HISTORY
 The
 .Nm
 library first appeared in
 .Fx 10.3 .
 .Sh BUGS
 If a device is suspended individually via
 .Fn devctl_suspend
 and the entire machine is subsequently suspended,
 the device will be resumed when the machine resumes.
Index: head/lib/libdevctl/devctl.c
===================================================================
--- head/lib/libdevctl/devctl.c	(revision 344854)
+++ head/lib/libdevctl/devctl.c	(revision 344855)
@@ -1,161 +1,160 @@
 /*-
  * Copyright (c) 2014 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/bus.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <string.h>
 #include "devctl.h"
 
 static int
 devctl_request(u_long cmd, struct devreq *req)
 {
 	static int devctl2_fd = -1;
 
 	if (devctl2_fd == -1) {
 		devctl2_fd = open("/dev/devctl2", O_RDONLY);
 		if (devctl2_fd == -1)
 			return (-1);
 	}
 	return (ioctl(devctl2_fd, cmd, req));
 }
 
 static int
 devctl_simple_request(u_long cmd, const char *name, int flags)
 {
 	struct devreq req;
 
 	memset(&req, 0, sizeof(req));
 	if (strlcpy(req.dr_name, name, sizeof(req.dr_name)) >=
 	    sizeof(req.dr_name)) {
 		errno = EINVAL;
 		return (-1);
 	}
 	req.dr_flags = flags;
 	return (devctl_request(cmd, &req));
 }
 
 int
 devctl_attach(const char *device)
 {
 
 	return (devctl_simple_request(DEV_ATTACH, device, 0));
 }
 
 int
 devctl_detach(const char *device, bool force)
 {
 
 	return (devctl_simple_request(DEV_DETACH, device, force ?
 	    DEVF_FORCE_DETACH : 0));
 }
 
 int
 devctl_enable(const char *device)
 {
 
 	return (devctl_simple_request(DEV_ENABLE, device, 0));
 }
 
 int
 devctl_disable(const char *device, bool force_detach)
 {
 
 	return (devctl_simple_request(DEV_DISABLE, device, force_detach ?
 	    DEVF_FORCE_DETACH : 0));
 }
 
 int
 devctl_suspend(const char *device)
 {
 
 	return (devctl_simple_request(DEV_SUSPEND, device, 0));
 }
 
 int
 devctl_resume(const char *device)
 {
 
 	return (devctl_simple_request(DEV_RESUME, device, 0));
 }
 
 int
 devctl_set_driver(const char *device, const char *driver, bool force)
 {
 	struct devreq req;
 
 	memset(&req, 0, sizeof(req));
 	if (strlcpy(req.dr_name, device, sizeof(req.dr_name)) >=
 	    sizeof(req.dr_name)) {
 		errno = EINVAL;
 		return (-1);
 	}
 	req.dr_data = __DECONST(char *, driver);
 	if (force)
 		req.dr_flags |= DEVF_SET_DRIVER_DETACH;
 	return (devctl_request(DEV_SET_DRIVER, &req));
 }
 
 int
 devctl_clear_driver(const char *device, bool force)
 {
 
 	return (devctl_simple_request(DEV_CLEAR_DRIVER, device, force ?
 	    DEVF_CLEAR_DRIVER_DETACH : 0));
 }
 
 int
 devctl_rescan(const char *device)
 {
 
 	return (devctl_simple_request(DEV_RESCAN, device, 0));
 }
 
 int
 devctl_delete(const char *device, bool force)
 {
 
 	return (devctl_simple_request(DEV_DELETE, device, force ?
 	    DEVF_FORCE_DELETE : 0));
 }
 
 int
 devctl_freeze(void)
 {
 
 	return (devctl_simple_request(DEV_FREEZE, "", 0));
 }
 
 int
 devctl_thaw(void)
 {
 
 	return (devctl_simple_request(DEV_THAW, "", 0));
 }
Index: head/lib/libdevctl/devctl.h
===================================================================
--- head/lib/libdevctl/devctl.h	(revision 344854)
+++ head/lib/libdevctl/devctl.h	(revision 344855)
@@ -1,47 +1,46 @@
 /*-
  * Copyright (c) 2014 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __DEVCTL_H__
 #define	__DEVCTL_H__
 
 #include <stdbool.h>
 
 int	devctl_attach(const char *device);
 int	devctl_detach(const char *device, bool force);
 int	devctl_enable(const char *device);
 int	devctl_disable(const char *device, bool force_detach);
 int	devctl_suspend(const char *device);
 int	devctl_resume(const char *device);
 int	devctl_set_driver(const char *device, const char *driver, bool force);
 int	devctl_clear_driver(const char *device, bool force);
 int	devctl_rescan(const char *device);
 int	devctl_delete(const char *device, bool force);
 int	devctl_freeze(void);
 int	devctl_thaw(void);
 
 #endif /* !__DEVCTL_H__ */
Index: head/lib/libkvm/kvm_aarch64.h
===================================================================
--- head/lib/libkvm/kvm_aarch64.h	(revision 344854)
+++ head/lib/libkvm/kvm_aarch64.h	(revision 344855)
@@ -1,67 +1,66 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_AARCH64_H__
 #define	__KVM_AARCH64_H__
 
 #ifdef __aarch64__
 #include <machine/pte.h>
 #endif
 
 typedef uint64_t	aarch64_physaddr_t;
 typedef uint64_t	aarch64_pte_t;
 
 #define	AARCH64_PAGE_SHIFT	12
 #define	AARCH64_PAGE_SIZE	(1 << AARCH64_PAGE_SHIFT)
 #define	AARCH64_PAGE_MASK	(AARCH64_PAGE_SIZE - 1)
 
 /* Source: arm64/include/pte.h */
 #define	AARCH64_ATTR_MASK	0xfff0000000000fff
 #define	AARCH64_ATTR_UXN	(1ULL << 54)
 #define	AARCH64_ATTR_PXN	(1ULL << 53)
 #define	AARCH64_ATTR_XN		(AARCH64_ATTR_PXN | AARCH64_ATTR_UXN)
 #define	AARCH64_ATTR_AP(x)	((x) << 6)
 #define	AARCH64_ATTR_AP_RO	(1 << 1)
 
 #define	AARCH64_ATTR_DESCR_MASK	3
 
 #define	AARCH64_L3_SHIFT	12
 #define	AARCH64_L3_PAGE		0x3
 
 #ifdef __aarch64__
 _Static_assert(PAGE_SHIFT == AARCH64_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == AARCH64_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == AARCH64_PAGE_MASK, "PAGE_MASK mismatch");
 _Static_assert(ATTR_MASK == AARCH64_ATTR_MASK, "ATTR_MASK mismatch");
 _Static_assert(ATTR_DESCR_MASK == AARCH64_ATTR_DESCR_MASK,
     "ATTR_DESCR_MASK mismatch");
 _Static_assert(L3_SHIFT == AARCH64_L3_SHIFT, "L3_SHIFT mismatch");
 _Static_assert(L3_PAGE == AARCH64_L3_PAGE, "L3_PAGE mismatch");
 #endif
 
 #endif /* !__KVM_AARCH64_H__ */
Index: head/lib/libkvm/kvm_amd64.h
===================================================================
--- head/lib/libkvm/kvm_amd64.h	(revision 344854)
+++ head/lib/libkvm/kvm_amd64.h	(revision 344855)
@@ -1,90 +1,89 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_AMD64_H__
 #define	__KVM_AMD64_H__
 
 #ifdef __amd64__
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #endif
 
 typedef uint64_t	amd64_physaddr_t;
 typedef uint64_t	amd64_pte_t;
 typedef uint64_t	amd64_pde_t;
 typedef uint64_t	amd64_pdpe_t;
 typedef	uint64_t	amd64_pml4e_t;
 
 #define	AMD64_NPTEPG		(AMD64_PAGE_SIZE / sizeof(amd64_pte_t))
 #define	AMD64_PAGE_SHIFT	12
 #define	AMD64_PAGE_SIZE		(1 << AMD64_PAGE_SHIFT)
 #define	AMD64_PAGE_MASK		(AMD64_PAGE_SIZE - 1)
 #define	AMD64_NPDEPG		(AMD64_PAGE_SIZE / sizeof(amd64_pde_t))
 #define	AMD64_PDRSHIFT		21
 #define	AMD64_NBPDR		(1 << AMD64_PDRSHIFT)
 #define	AMD64_PDRMASK		(AMD64_NBPDR - 1)
 #define	AMD64_NPDPEPG		(AMD64_PAGE_SIZE / sizeof(amd64_pdpe_t))
 #define	AMD64_PDPSHIFT		30
 #define	AMD64_NBPDP		(1 << AMD64_PDPSHIFT)
 #define	AMD64_PDPMASK		(AMD64_NBPDP - 1)
 #define	AMD64_NPML4EPG		(AMD64_PAGE_SIZE / sizeof(amd64_pml4e_t))
 #define	AMD64_PML4SHIFT		39
 
 #define	AMD64_PG_NX		(1ULL << 63)
 #define	AMD64_PG_V		0x001
 #define	AMD64_PG_RW		0x002
 #define	AMD64_PG_PS		0x080
 #define	AMD64_PG_FRAME		(0x000ffffffffff000)
 #define	AMD64_PG_PS_FRAME	(0x000fffffffe00000)
 #define	AMD64_PG_1GB_FRAME	(0x000fffffc0000000)
 
 #ifdef __amd64__
 _Static_assert(NPTEPG == AMD64_NPTEPG, "NPTEPG mismatch");
 _Static_assert(PAGE_SHIFT == AMD64_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == AMD64_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == AMD64_PAGE_MASK, "PAGE_MASK mismatch");
 _Static_assert(NPDEPG == AMD64_NPDEPG, "NPDEPG mismatch");
 _Static_assert(PDRSHIFT == AMD64_PDRSHIFT, "PDRSHIFT mismatch");
 _Static_assert(NBPDR == AMD64_NBPDR, "NBPDR mismatch");
 _Static_assert(PDRMASK == AMD64_PDRMASK, "PDRMASK mismatch");
 _Static_assert(NPDPEPG == AMD64_NPDPEPG, "NPDPEPG mismatch");
 _Static_assert(PDPSHIFT == AMD64_PDPSHIFT, "PDPSHIFT mismatch");
 _Static_assert(NBPDP == AMD64_NBPDP, "NBPDP mismatch");
 _Static_assert(PDPMASK == AMD64_PDPMASK, "PDPMASK mismatch");
 _Static_assert(NPML4EPG == AMD64_NPML4EPG, "NPML4EPG mismatch");
 _Static_assert(PML4SHIFT == AMD64_PML4SHIFT, "PML4SHIFT mismatch");
 
 _Static_assert(PG_V == AMD64_PG_V, "PG_V mismatch");
 _Static_assert(PG_PS == AMD64_PG_PS, "PG_PS mismatch");
 _Static_assert(PG_FRAME == AMD64_PG_FRAME, "PG_FRAME mismatch");
 _Static_assert(PG_PS_FRAME == AMD64_PG_PS_FRAME, "PG_PS_FRAME mismatch");
 #endif
 
 int	_amd64_native(kvm_t *);
 
 #endif /* !__KVM_AMD64_H__ */
Index: head/lib/libkvm/kvm_arm.h
===================================================================
--- head/lib/libkvm/kvm_arm.h	(revision 344854)
+++ head/lib/libkvm/kvm_arm.h	(revision 344855)
@@ -1,126 +1,125 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_ARM_H__
 #define	__KVM_ARM_H__
 
 typedef uint32_t	arm_physaddr_t;
 typedef uint32_t	arm_pd_entry_t;
 typedef uint32_t	arm_pt_entry_t;
 
 #define	ARM_PAGE_SHIFT	12
 #define	ARM_PAGE_SIZE	(1 << ARM_PAGE_SHIFT)	/* Page size */
 #define	ARM_PAGE_MASK	(ARM_PAGE_SIZE - 1)
 
 #define	ARM_L1_TABLE_SIZE	0x4000		/* 16K */
 
 #define	ARM_L1_S_SIZE	0x00100000	/* 1M */
 #define	ARM_L1_S_OFFSET	(ARM_L1_S_SIZE - 1)
 #define	ARM_L1_S_FRAME	(~ARM_L1_S_OFFSET)
 #define	ARM_L1_S_SHIFT	20
 
 #define	ARM_L2_L_SIZE	0x00010000	/* 64K */
 #define	ARM_L2_L_OFFSET	(ARM_L2_L_SIZE - 1)
 #define	ARM_L2_L_FRAME	(~ARM_L2_L_OFFSET)
 #define	ARM_L2_L_SHIFT	16
 
 #define	ARM_L2_S_SIZE	0x00001000	/* 4K */
 #define	ARM_L2_S_OFFSET	(ARM_L2_S_SIZE - 1)
 #define	ARM_L2_S_FRAME	(~ARM_L2_S_OFFSET)
 #define	ARM_L2_S_SHIFT	12
 #define	ARM_L2_TEX1	0x00000080
 #define	ARM_PTE2_RO	ARM_L2_TEX1
 #define	ARM_L2_NX	0x00000001
 #define	ARM_PTE2_NX	ARM_L2_NX
 
 /*
  * Note: L2_S_PROT_W differs depending on whether the system is generic or
  *       xscale.  This isn't easily accessible in this context, so use an
  *       approximation of 'xscale' which is a subset of 'generic'.
  */
 #define	ARM_L2_AP0(x)	((x) << 4)
 #define	ARM_AP_W	0x01
 #define	ARM_L2_S_PROT_W	(ARM_L2_AP0(ARM_AP_W))
 
 #define	ARM_L1_TYPE_INV	0x00		/* Invalid (fault) */
 #define	ARM_L1_TYPE_C	0x01		/* Coarse L2 */
 #define	ARM_L1_TYPE_S	0x02		/* Section */
 #define	ARM_L1_TYPE_MASK	0x03		/* Mask	of type	bits */
 
 #define	ARM_L1_S_ADDR_MASK	0xfff00000	/* phys	address	of section */
 #define	ARM_L1_C_ADDR_MASK	0xfffffc00	/* phys	address	of L2 Table */
 
 #define	ARM_L2_TYPE_INV	0x00		/* Invalid (fault) */
 #define	ARM_L2_TYPE_L	0x01		/* Large Page - 64k */
 #define	ARM_L2_TYPE_S	0x02		/* Small Page -  4k */
 #define	ARM_L2_TYPE_T	0x03		/* Tiny Page  -  1k - not used */
 #define	ARM_L2_TYPE_MASK	0x03
 
 #ifdef __arm__
 #include <machine/acle-compat.h>
 
 #if __ARM_ARCH >= 6
 #include <machine/pte-v6.h>
 #else
 #include <machine/pte-v4.h>
 #endif
 
 _Static_assert(PAGE_SHIFT == ARM_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == ARM_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == ARM_PAGE_MASK, "PAGE_MASK mismatch");
 _Static_assert(L1_TABLE_SIZE == ARM_L1_TABLE_SIZE, "L1_TABLE_SIZE mismatch");
 _Static_assert(L1_S_SIZE == ARM_L1_S_SIZE, "L1_S_SIZE mismatch");
 _Static_assert(L1_S_OFFSET == ARM_L1_S_OFFSET, "L1_S_OFFSET mismatch");
 _Static_assert(L1_S_FRAME == ARM_L1_S_FRAME, "L1_S_FRAME mismatch");
 _Static_assert(L1_S_SHIFT == ARM_L1_S_SHIFT, "L1_S_SHIFT mismatch");
 _Static_assert(L2_L_SIZE == ARM_L2_L_SIZE, "L2_L_SIZE mismatch");
 _Static_assert(L2_L_OFFSET == ARM_L2_L_OFFSET, "L2_L_OFFSET mismatch");
 _Static_assert(L2_L_FRAME == ARM_L2_L_FRAME, "L2_L_FRAME mismatch");
 _Static_assert(L2_L_SHIFT == ARM_L2_L_SHIFT, "L2_L_SHIFT mismatch");
 _Static_assert(L2_S_SIZE == ARM_L2_S_SIZE, "L2_S_SIZE mismatch");
 _Static_assert(L2_S_OFFSET == ARM_L2_S_OFFSET, "L2_S_OFFSET mismatch");
 _Static_assert(L2_S_FRAME == ARM_L2_S_FRAME, "L2_S_FRAME mismatch");
 _Static_assert(L2_S_SHIFT == ARM_L2_S_SHIFT, "L2_S_SHIFT mismatch");
 _Static_assert(L1_TYPE_INV == ARM_L1_TYPE_INV, "L1_TYPE_INV mismatch");
 _Static_assert(L1_TYPE_C == ARM_L1_TYPE_C, "L1_TYPE_C mismatch");
 _Static_assert(L1_TYPE_S == ARM_L1_TYPE_S, "L1_TYPE_S mismatch");
 _Static_assert(L1_TYPE_MASK == ARM_L1_TYPE_MASK, "L1_TYPE_MASK mismatch");
 _Static_assert(L1_S_ADDR_MASK == ARM_L1_S_ADDR_MASK, "L1_S_ADDR_MASK mismatch");
 _Static_assert(L1_C_ADDR_MASK == ARM_L1_C_ADDR_MASK, "L1_C_ADDR_MASK mismatch");
 _Static_assert(L2_TYPE_INV == ARM_L2_TYPE_INV, "L2_TYPE_INV mismatch");
 _Static_assert(L2_TYPE_L == ARM_L2_TYPE_L, "L2_TYPE_L mismatch");
 _Static_assert(L2_TYPE_S == ARM_L2_TYPE_S, "L2_TYPE_S mismatch");
 #if __ARM_ARCH < 6
 _Static_assert(L2_TYPE_T == ARM_L2_TYPE_T, "L2_TYPE_T mismatch");
 #endif
 _Static_assert(L2_TYPE_MASK == ARM_L2_TYPE_MASK, "L2_TYPE_MASK mismatch");
 #endif
 
 int	_arm_native(kvm_t *);
 
 #endif /* !__KVM_ARM_H__ */
Index: head/lib/libkvm/kvm_i386.h
===================================================================
--- head/lib/libkvm/kvm_i386.h	(revision 344854)
+++ head/lib/libkvm/kvm_i386.h	(revision 344855)
@@ -1,84 +1,83 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_I386_H__
 #define	__KVM_I386_H__
 
 #ifdef __i386__
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #endif
 
 typedef uint32_t	i386_physaddr_t;
 typedef uint32_t	i386_pte_t;
 typedef uint32_t	i386_pde_t;
 typedef uint64_t	i386_physaddr_pae_t;
 typedef	uint64_t	i386_pte_pae_t;
 typedef	uint64_t	i386_pde_pae_t;
 
 #define	I386_PAGE_SHIFT		12
 #define	I386_PAGE_SIZE		(1 << I386_PAGE_SHIFT)
 #define	I386_PAGE_MASK		(I386_PAGE_SIZE - 1)
 #define	I386_NPTEPG		(I386_PAGE_SIZE / sizeof(i386_pte_t))
 #define	I386_PDRSHIFT		22
 #define	I386_NBPDR		(1 << I386_PDRSHIFT)
 #define	I386_PAGE_PS_MASK	(I386_NBPDR - 1)
 #define	I386_NPTEPG_PAE		(I386_PAGE_SIZE / sizeof(i386_pte_pae_t))
 #define	I386_PDRSHIFT_PAE	21
 #define	I386_NBPDR_PAE		(1 << I386_PDRSHIFT_PAE)
 #define	I386_PAGE_PS_MASK_PAE	(I386_NBPDR_PAE - 1)
 
 /* Source: i386/include/pmap.h */
 #define	I386_PG_V		0x001
 #define	I386_PG_RW		0x002
 #define	I386_PG_PS		0x080
 #define	I386_PG_NX		(1ULL << 63)
 #define	I386_PG_FRAME_PAE	(0x000ffffffffff000ull)
 #define	I386_PG_PS_FRAME_PAE	(0x000fffffffe00000ull)
 #define	I386_PG_FRAME		(0xfffff000)
 #define	I386_PG_PS_FRAME	(0xffc00000)
 
 #ifdef __i386__
 _Static_assert(PAGE_SHIFT == I386_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == I386_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == I386_PAGE_MASK, "PAGE_MASK mismatch");
 #if 0
 _Static_assert(NPTEPG == I386_NPTEPG, "NPTEPG mismatch");
 _Static_assert(NBPDR == I386_NBPDR, "NBPDR mismatch");
 #endif
 _Static_assert(PDRSHIFT_NOPAE == I386_PDRSHIFT, "PDRSHIFT mismatch");
 
 _Static_assert(PG_V == I386_PG_V, "PG_V mismatch");
 _Static_assert(PG_PS == I386_PG_PS, "PG_PS mismatch");
 _Static_assert((u_int)PG_FRAME_NOPAE == I386_PG_FRAME, "PG_FRAME mismatch");
 _Static_assert(PG_PS_FRAME_NOPAE == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch");
 #endif
 
 int	_i386_native(kvm_t *);
 
 #endif /* !__KVM_I386_H__ */
Index: head/lib/libkvm/kvm_mips.h
===================================================================
--- head/lib/libkvm/kvm_mips.h	(revision 344854)
+++ head/lib/libkvm/kvm_mips.h	(revision 344855)
@@ -1,118 +1,117 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_MIPS_H__
 #define	__KVM_MIPS_H__
 
 #ifdef __mips__
 #include <machine/pte.h>
 #endif
 
 typedef uint64_t	mips_physaddr_t;
 
 typedef uint32_t	mips32_pte_t;
 typedef uint64_t	mips64_pte_t;
 
 #define	MIPS_PAGE_SHIFT		12
 #define	MIPS_PAGE_SIZE		(1 << MIPS_PAGE_SHIFT)
 #define	MIPS_PAGE_MASK		(MIPS_PAGE_SIZE - 1)
 
 #define	MIPS32_KSEG0_START	0x80000000
 #define	MIPS32_KSEG0_END	0x9fffffff
 #define	MIPS32_KSEG1_START	0xa0000000
 #define	MIPS32_KSEG1_END	0xbfffffff
 #define	MIPS64_KSEG0_START	0xffffffff80000000
 #define	MIPS64_KSEG0_END	0xffffffff9fffffff
 #define	MIPS64_KSEG1_START	0xffffffffa0000000
 #define	MIPS64_KSEG1_END	0xffffffffbfffffff
 
 #define	MIPS32_PFN_MASK		(0x1FFFFFC0)
 #define	MIPS64_PFN_MASK		0x3FFFFFFC0
 #define	MIPS_PFN_SHIFT		(6)
 
 #define	MIPS_PFN_TO_PA(pfn)	(((pfn) >> MIPS_PFN_SHIFT) << MIPS_PAGE_SHIFT)
 #define	MIPS32_PTE_TO_PFN(pte)	((pte) & MIPS32_PFN_MASK)
 #define	MIPS32_PTE_TO_PA(pte)	(MIPS_PFN_TO_PA(MIPS32_PTE_TO_PFN((pte))))
 #define	MIPS64_PTE_TO_PFN(pte)	((pte) & MIPS64_PFN_MASK)
 #define	MIPS64_PTE_TO_PA(pte)	(MIPS_PFN_TO_PA(MIPS64_PTE_TO_PFN((pte))))
 
 #define	MIPS32_SWBITS_SHIFT	29
 #define	MIPS64_SWBITS_SHIFT	55
 #define	MIPS_PTE_V		0x02
 #define	MIPS32_PTE_RO		((mips32_pte_t)0x01 << MIPS32_SWBITS_SHIFT)
 #define	MIPS64_PTE_RO		((mips64_pte_t)0x01 << MIPS64_SWBITS_SHIFT)
 
 static inline mips32_pte_t
 _mips32_pte_get(kvm_t *kd, u_long pteindex)
 {
 	mips32_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte));
 
 	return _kvm32toh(kd, *pte);
 }
 
 static inline mips64_pte_t
 _mips64_pte_get(kvm_t *kd, u_long pteindex)
 {
 	mips64_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte));
 
 	return _kvm64toh(kd, *pte);
 }
 
 #ifdef __mips__
 _Static_assert(PAGE_SHIFT == MIPS_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == MIPS_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == MIPS_PAGE_MASK, "PAGE_MASK mismatch");
 #ifdef __mips_n64
 _Static_assert((uint64_t)MIPS_KSEG0_START == MIPS64_KSEG0_START,
     "MIPS_KSEG0_START mismatch");
 _Static_assert((uint64_t)MIPS_KSEG0_END == MIPS64_KSEG0_END,
     "MIPS_KSEG0_END mismatch");
 _Static_assert((uint64_t)MIPS_KSEG1_START == MIPS64_KSEG1_START,
     "MIPS_KSEG1_START mismatch");
 _Static_assert((uint64_t)MIPS_KSEG1_END == MIPS64_KSEG1_END,
     "MIPS_KSEG1_END mismatch");
 #else
 _Static_assert((uint32_t)MIPS_KSEG0_START == MIPS32_KSEG0_START,
     "MIPS_KSEG0_START mismatch");
 _Static_assert((uint32_t)MIPS_KSEG0_END == MIPS32_KSEG0_END,
     "MIPS_KSEG0_END mismatch");
 _Static_assert((uint32_t)MIPS_KSEG1_START == MIPS32_KSEG1_START,
     "MIPS_KSEG1_START mismatch");
 _Static_assert((uint32_t)MIPS_KSEG1_END == MIPS32_KSEG1_END,
     "MIPS_KSEG1_END mismatch");
 #endif
 #if defined(__mips_n64) || defined(__mips_n32)
 _Static_assert(TLBLO_PFN_MASK == MIPS64_PFN_MASK, "TLBLO_PFN_MASK mismatch");
 #else
 _Static_assert(TLBLO_PFN_MASK == MIPS32_PFN_MASK, "TLBLO_PFN_MASK mismatch");
 #endif
 _Static_assert(TLBLO_PFN_SHIFT == MIPS_PFN_SHIFT, "TLBLO_PFN_SHIFT mismatch");
 _Static_assert(TLB_PAGE_SHIFT == MIPS_PAGE_SHIFT, "TLB_PAGE_SHIFT mismatch");
 #endif
 
 #endif /* !__KVM_MIPS_H__ */
Index: head/lib/libkvm/kvm_native.3
===================================================================
--- head/lib/libkvm/kvm_native.3	(revision 344854)
+++ head/lib/libkvm/kvm_native.3	(revision 344855)
@@ -1,62 +1,61 @@
 .\"
 .\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd March 15, 2017
 .Dt KVM_NATIVE 3
 .Os
 .Sh NAME
 .Nm kvm_native
 .Nd is a kvm descriptor opened on a native kernel image
 .Sh LIBRARY
 .Lb libkvm
 .Sh SYNOPSIS
 .In kvm.h
 .Ft int
 .Fn kvm_native "kvm_t *kd"
 .Sh DESCRIPTION
 The
 .Nm kvm
 library provides an interface for accessing kernel virtual memory images
 for both native kernel images
 .Pq where the ABI of the kernel executable matches the host system
 and non-native kernel images.
 The
 .Fn kvm_native
 function returns a non-zero value if the kvm descriptor
 .Fa kd
 is attached to a native kernel image;
 otherwise it returns zero.
 .Sh RETURN VALUES
 The
 .Fn kvm_native
 function returns a non-zero value if the kvm descriptor
 .Fa kd
 is attached to a native kernel image;
 otherwise it returns zero.
 .Sh SEE ALSO
 .Xr kvm 3 ,
 .Xr kvm_open2 3
Index: head/lib/libkvm/kvm_riscv.h
===================================================================
--- head/lib/libkvm/kvm_riscv.h	(revision 344854)
+++ head/lib/libkvm/kvm_riscv.h	(revision 344855)
@@ -1,90 +1,89 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  * Copyright (c) 2019 Mitchell Horne
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_RISCV_H__
 #define	__KVM_RISCV_H__
 
 #ifdef __riscv
 #include <machine/pte.h>
 #endif
 
 typedef uint64_t	riscv_physaddr_t;
 typedef uint64_t	riscv_pt_entry_t;
 
 #define	RISCV_PAGE_SHIFT	12
 #define	RISCV_PAGE_SIZE		(1 << RISCV_PAGE_SHIFT)
 #define	RISCV_PAGE_MASK		(RISCV_PAGE_SIZE - 1)
 
 /* Source: sys/riscv/include/pte.h */
 #define	RISCV_L3_SHIFT		12
 #define	RISCV_L3_SIZE		(1 << L3_SHIFT)
 #define	RISCV_L3_OFFSET 	(L3_SIZE - 1)
 
 #define	RISCV_PTE_SW_MANAGED	(1 << 9)
 #define	RISCV_PTE_SW_WIRED	(1 << 8)
 #define	RISCV_PTE_D		(1 << 7) /* Dirty */
 #define	RISCV_PTE_A		(1 << 6) /* Accessed */
 #define	RISCV_PTE_G		(1 << 5) /* Global */
 #define	RISCV_PTE_U		(1 << 4) /* User */
 #define	RISCV_PTE_X		(1 << 3) /* Execute */
 #define	RISCV_PTE_W		(1 << 2) /* Write */
 #define	RISCV_PTE_R		(1 << 1) /* Read */
 #define	RISCV_PTE_V		(1 << 0) /* Valid */
 #define	RISCV_PTE_RWX		(RISCV_PTE_R | RISCV_PTE_W | RISCV_PTE_X)
 
 #define	RISCV_PTE_PPN0_S	10
 
 #ifdef __riscv
 _Static_assert(sizeof(pt_entry_t) == sizeof(riscv_pt_entry_t),
     "pt_entry_t size mismatch");
 
 _Static_assert(PAGE_SHIFT == RISCV_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == RISCV_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == RISCV_PAGE_MASK, "PAGE_MASK mismatch");
 
 _Static_assert(L3_SHIFT == RISCV_L3_SHIFT, "L3_SHIFT mismatch");
 _Static_assert(L3_SIZE == RISCV_L3_SIZE, "L3_SIZE mismatch");
 _Static_assert(L3_OFFSET == RISCV_L3_OFFSET, "L3_OFFSET mismatch");
 _Static_assert(PTE_PPN0_S == RISCV_PTE_PPN0_S, "PTE_PPN0_S mismatch");
 
 _Static_assert(PTE_SW_MANAGED == RISCV_PTE_SW_MANAGED,
     "PTE_SW_MANAGED mismatch");
 _Static_assert(PTE_SW_WIRED == RISCV_PTE_SW_WIRED, "PTE_SW_WIRED mismatch");
 _Static_assert(PTE_D == RISCV_PTE_D, "PTE_D mismatch");
 _Static_assert(PTE_A == RISCV_PTE_A, "PTE_A mismatch");
 _Static_assert(PTE_G == RISCV_PTE_G, "PTE_G mismatch");
 _Static_assert(PTE_U == RISCV_PTE_U, "PTE_U mismatch");
 _Static_assert(PTE_X == RISCV_PTE_X, "PTE_X mismatch");
 _Static_assert(PTE_W == RISCV_PTE_W, "PTE_W mismatch");
 _Static_assert(PTE_R == RISCV_PTE_R, "PTE_R mismatch");
 _Static_assert(PTE_V == RISCV_PTE_V, "PTE_V mismatch");
 _Static_assert(PTE_RWX == RISCV_PTE_RWX, "PTE_RWX mismatch");
 #endif
 
 #endif /* !__KVM_RISCV_H__ */
Index: head/lib/libkvm/kvm_sparc64.h
===================================================================
--- head/lib/libkvm/kvm_sparc64.h	(revision 344854)
+++ head/lib/libkvm/kvm_sparc64.h	(revision 344855)
@@ -1,118 +1,117 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __KVM_SPARC64_H__
 #define	__KVM_SPARC64_H__
 
 #ifdef __sparc64__
 #include <sys/queue.h>
 #include <machine/tlb.h>
 #include <machine/tte.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #endif
 
 #define	SPARC64_PAGE_SHIFT	13
 #define	SPARC64_PAGE_SIZE	(1 << SPARC64_PAGE_SHIFT)
 #define	SPARC64_PAGE_MASK	(SPARC64_PAGE_SIZE - 1)
 
 #define	SPARC64_MIN_DIRECT_ADDRESS	(0xfffff80000000000)
 
 #define	SPARC64_DIRECT_ADDRESS_BITS	(43)
 #define	SPARC64_DIRECT_ADDRESS_MASK					\
 	(((uint64_t)1 << SPARC64_DIRECT_ADDRESS_BITS) - 1)
 
 #define	SPARC64_DIRECT_TO_PHYS(va)	((va) & SPARC64_DIRECT_ADDRESS_MASK)
 
 #define	SPARC64_TTE_SHIFT	(5)
 
 #define	SPARC64_TD_SIZE_SHIFT	(61)
 #define	SPARC64_TD_PA_SHIFT	(13)
 
 #define	SPARC64_TD_SIZE_BITS	(2)
 #define	SPARC64_TD_PA_CH_BITS	(30)	/* US-III{,i,+}, US-IV{,+}, SPARC64 V */
 #define	SPARC64_TD_PA_BITS	SPARC64_TD_PA_CH_BITS
 
 #define	SPARC64_TD_SIZE_MASK	(((uint64_t)1 << SPARC64_TD_SIZE_BITS) - 1)
 #define	SPARC64_TD_PA_MASK	(((uint64_t)1 << SPARC64_TD_PA_BITS) - 1)
 
 #define	SPARC64_TD_V		((uint64_t)1 << 63)
 
 #define	SPARC64_TV_SIZE_BITS	(SPARC64_TD_SIZE_BITS)
 #define	SPARC64_TV_VPN(va, sz)						\
 	((((va) >> SPARC64_TTE_PAGE_SHIFT(sz)) << SPARC64_TV_SIZE_BITS) | sz)
 
 #define	SPARC64_TTE_SIZE_SPREAD	(3)
 #define	SPARC64_TTE_PAGE_SHIFT(sz)					\
 	(SPARC64_PAGE_SHIFT + ((sz) * SPARC64_TTE_SIZE_SPREAD))
 
 #define	SPARC64_TTE_GET_SIZE(tp)					\
 	(((tp)->tte_data >> SPARC64_TD_SIZE_SHIFT) & SPARC64_TD_SIZE_MASK)
 
 #define	SPARC64_TTE_GET_PA(tp)						\
 	((tp)->tte_data & (SPARC64_TD_PA_MASK << SPARC64_TD_PA_SHIFT))
 
 struct sparc64_tte {
 	uint64_t tte_vpn;
 	uint64_t tte_data;
 };
 
 static __inline int
 sparc64_tte_match(struct sparc64_tte *tp, kvaddr_t va)
 {
 
 	return (((tp->tte_data & SPARC64_TD_V) != 0) &&
 	    (tp->tte_vpn == SPARC64_TV_VPN(va, SPARC64_TTE_GET_SIZE(tp))));
 }
 
 #ifdef __sparc64__
 _Static_assert(PAGE_SHIFT == SPARC64_PAGE_SHIFT, "PAGE_SHIFT mismatch");
 _Static_assert(PAGE_SIZE == SPARC64_PAGE_SIZE, "PAGE_SIZE mismatch");
 _Static_assert(PAGE_MASK == SPARC64_PAGE_MASK, "PAGE_MASK mismatch");
 _Static_assert(VM_MIN_DIRECT_ADDRESS == SPARC64_MIN_DIRECT_ADDRESS,
     "VM_MIN_DIRECT_ADDRESS mismatch");
 _Static_assert(TLB_DIRECT_ADDRESS_BITS == SPARC64_DIRECT_ADDRESS_BITS,
     "TLB_DIRECT_ADDRESS_BITS mismatch");
 _Static_assert(TLB_DIRECT_ADDRESS_MASK == SPARC64_DIRECT_ADDRESS_MASK,
     "TLB_DIRECT_ADDRESS_MASK mismatch");
 _Static_assert(TTE_SHIFT == SPARC64_TTE_SHIFT, "TTE_SHIFT mismatch");
 _Static_assert(TD_SIZE_SHIFT == SPARC64_TD_SIZE_SHIFT,
     "TD_SIZE_SHIFT mismatch");
 _Static_assert(TD_PA_SHIFT == SPARC64_TD_PA_SHIFT,
     "TD_PA_SHIFT mismatch");
 _Static_assert(TD_SIZE_BITS == SPARC64_TD_SIZE_BITS, "TD_SIZE_BITS mismatch");
 _Static_assert(TD_PA_BITS == SPARC64_TD_PA_BITS, "TD_PA_BITS mismatch");
 _Static_assert(TD_SIZE_MASK == SPARC64_TD_SIZE_MASK, "TD_SIZE_MASK mismatch");
 _Static_assert(TD_PA_MASK == SPARC64_TD_PA_MASK, "TD_PA_MASK mismatch");
 _Static_assert(TD_V == SPARC64_TD_V, "TD_V mismatch");
 _Static_assert(TV_SIZE_BITS == SPARC64_TV_SIZE_BITS, "TV_SIZE_BITS mismatch");
 _Static_assert(TTE_SIZE_SPREAD == SPARC64_TTE_SIZE_SPREAD,
     "TTE_SIZE_SPREAD mismatch");
 #endif
 
 #endif /* !__KVM_SPARC64_H__ */
Index: head/lib/libsysdecode/errno.c
===================================================================
--- head/lib/libsysdecode/errno.c	(revision 344854)
+++ head/lib/libsysdecode/errno.c	(revision 344855)
@@ -1,191 +1,190 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/acl.h>
 #include <sys/wait.h>
 #include <errno.h>
 #include <limits.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sysdecode.h>
 
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 static
 #include <compat/linux/linux_errno.inc>
 #endif
 
 #include <contrib/cloudabi/cloudabi_types_common.h>
 
 static const int cloudabi_errno_table[] = {
 	[CLOUDABI_E2BIG]		= E2BIG,
 	[CLOUDABI_EACCES]		= EACCES,
 	[CLOUDABI_EADDRINUSE]		= EADDRINUSE,
 	[CLOUDABI_EADDRNOTAVAIL]	= EADDRNOTAVAIL,
 	[CLOUDABI_EAFNOSUPPORT]		= EAFNOSUPPORT,
 	[CLOUDABI_EAGAIN]		= EAGAIN,
 	[CLOUDABI_EALREADY]		= EALREADY,
 	[CLOUDABI_EBADF]		= EBADF,
 	[CLOUDABI_EBADMSG]		= EBADMSG,
 	[CLOUDABI_EBUSY]		= EBUSY,
 	[CLOUDABI_ECANCELED]		= ECANCELED,
 	[CLOUDABI_ECHILD]		= ECHILD,
 	[CLOUDABI_ECONNABORTED]		= ECONNABORTED,
 	[CLOUDABI_ECONNREFUSED]		= ECONNREFUSED,
 	[CLOUDABI_ECONNRESET]		= ECONNRESET,
 	[CLOUDABI_EDEADLK]		= EDEADLK,
 	[CLOUDABI_EDESTADDRREQ]		= EDESTADDRREQ,
 	[CLOUDABI_EDOM]			= EDOM,
 	[CLOUDABI_EDQUOT]		= EDQUOT,
 	[CLOUDABI_EEXIST]		= EEXIST,
 	[CLOUDABI_EFAULT]		= EFAULT,
 	[CLOUDABI_EFBIG]		= EFBIG,
 	[CLOUDABI_EHOSTUNREACH]		= EHOSTUNREACH,
 	[CLOUDABI_EIDRM]		= EIDRM,
 	[CLOUDABI_EILSEQ]		= EILSEQ,
 	[CLOUDABI_EINPROGRESS]		= EINPROGRESS,
 	[CLOUDABI_EINTR]		= EINTR,
 	[CLOUDABI_EINVAL]		= EINVAL,
 	[CLOUDABI_EIO]			= EIO,
 	[CLOUDABI_EISCONN]		= EISCONN,
 	[CLOUDABI_EISDIR]		= EISDIR,
 	[CLOUDABI_ELOOP]		= ELOOP,
 	[CLOUDABI_EMFILE]		= EMFILE,
 	[CLOUDABI_EMLINK]		= EMLINK,
 	[CLOUDABI_EMSGSIZE]		= EMSGSIZE,
 	[CLOUDABI_EMULTIHOP]		= EMULTIHOP,
 	[CLOUDABI_ENAMETOOLONG]		= ENAMETOOLONG,
 	[CLOUDABI_ENETDOWN]		= ENETDOWN,
 	[CLOUDABI_ENETRESET]		= ENETRESET,
 	[CLOUDABI_ENETUNREACH]		= ENETUNREACH,
 	[CLOUDABI_ENFILE]		= ENFILE,
 	[CLOUDABI_ENOBUFS]		= ENOBUFS,
 	[CLOUDABI_ENODEV]		= ENODEV,
 	[CLOUDABI_ENOENT]		= ENOENT,
 	[CLOUDABI_ENOEXEC]		= ENOEXEC,
 	[CLOUDABI_ENOLCK]		= ENOLCK,
 	[CLOUDABI_ENOLINK]		= ENOLINK,
 	[CLOUDABI_ENOMEM]		= ENOMEM,
 	[CLOUDABI_ENOMSG]		= ENOMSG,
 	[CLOUDABI_ENOPROTOOPT]		= ENOPROTOOPT,
 	[CLOUDABI_ENOSPC]		= ENOSPC,
 	[CLOUDABI_ENOSYS]		= ENOSYS,
 	[CLOUDABI_ENOTCONN]		= ENOTCONN,
 	[CLOUDABI_ENOTDIR]		= ENOTDIR,
 	[CLOUDABI_ENOTEMPTY]		= ENOTEMPTY,
 	[CLOUDABI_ENOTRECOVERABLE]	= ENOTRECOVERABLE,
 	[CLOUDABI_ENOTSOCK]		= ENOTSOCK,
 	[CLOUDABI_ENOTSUP]		= ENOTSUP,
 	[CLOUDABI_ENOTTY]		= ENOTTY,
 	[CLOUDABI_ENXIO]		= ENXIO,
 	[CLOUDABI_EOVERFLOW]		= EOVERFLOW,
 	[CLOUDABI_EOWNERDEAD]		= EOWNERDEAD,
 	[CLOUDABI_EPERM]		= EPERM,
 	[CLOUDABI_EPIPE]		= EPIPE,
 	[CLOUDABI_EPROTO]		= EPROTO,
 	[CLOUDABI_EPROTONOSUPPORT]	= EPROTONOSUPPORT,
 	[CLOUDABI_EPROTOTYPE]		= EPROTOTYPE,
 	[CLOUDABI_ERANGE]		= ERANGE,
 	[CLOUDABI_EROFS]		= EROFS,
 	[CLOUDABI_ESPIPE]		= ESPIPE,
 	[CLOUDABI_ESRCH]		= ESRCH,
 	[CLOUDABI_ESTALE]		= ESTALE,
 	[CLOUDABI_ETIMEDOUT]		= ETIMEDOUT,
 	[CLOUDABI_ETXTBSY]		= ETXTBSY,
 	[CLOUDABI_EXDEV]		= EXDEV,
 	[CLOUDABI_ENOTCAPABLE]		= ENOTCAPABLE,
 };
 
 int
 sysdecode_abi_to_freebsd_errno(enum sysdecode_abi abi, int error)
 {
 
 	switch (abi) {
 	case SYSDECODE_ABI_FREEBSD:
 	case SYSDECODE_ABI_FREEBSD32:
 		return (error);
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 	case SYSDECODE_ABI_LINUX:
 	case SYSDECODE_ABI_LINUX32: {
 		unsigned int i;
 
 		/*
 		 * This is imprecise since it returns the first
 		 * matching errno.
 		 */
 		for (i = 0; i < nitems(linux_errtbl); i++) {
 			if (error == linux_errtbl[i])
 				return (i);
 		}
 		break;
 	}
 #endif
 	case SYSDECODE_ABI_CLOUDABI32:
 	case SYSDECODE_ABI_CLOUDABI64:
 		if (error >= 0 &&
 		    (unsigned int)error < nitems(cloudabi_errno_table))
 			return (cloudabi_errno_table[error]);
 		break;
 	default:
 		break;
 	}
 	return (INT_MAX);
 }
 
 int
 sysdecode_freebsd_to_abi_errno(enum sysdecode_abi abi, int error)
 {
 
 	switch (abi) {
 	case SYSDECODE_ABI_FREEBSD:
 	case SYSDECODE_ABI_FREEBSD32:
 		return (error);
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 	case SYSDECODE_ABI_LINUX:
 	case SYSDECODE_ABI_LINUX32:
 		if (error >= 0 && error <= ELAST)
 			return (linux_errtbl[error]);
 		break;
 #endif
 	case SYSDECODE_ABI_CLOUDABI32:
 	case SYSDECODE_ABI_CLOUDABI64: {
 		unsigned int i;
 
 		for (i = 0; i < nitems(cloudabi_errno_table); i++) {
 			if (error == cloudabi_errno_table[i])
 				return (i);
 		}
 		break;
 	}
 	default:
 		break;
 	}
 	return (INT_MAX);
 }
 
Index: head/lib/libsysdecode/signal.c
===================================================================
--- head/lib/libsysdecode/signal.c	(revision 344854)
+++ head/lib/libsysdecode/signal.c	(revision 344855)
@@ -1,143 +1,142 @@
 /*-
  * Copyright (c) 2016 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sysdecode.h>
 
 static const char *signames[] = {
 	[SIGHUP] = "SIGHUP",
 	[SIGINT] = "SIGINT",
 	[SIGQUIT] = "SIGQUIT",
 	[SIGILL] = "SIGILL",
 	[SIGTRAP] = "SIGTRAP",
 	[SIGABRT] = "SIGABRT",
 	[SIGEMT] = "SIGEMT",
 	[SIGFPE] = "SIGFPE",
 	[SIGKILL] = "SIGKILL",
 	[SIGBUS] = "SIGBUS",
 	[SIGSEGV] = "SIGSEGV",
 	[SIGSYS] = "SIGSYS",
 	[SIGPIPE] = "SIGPIPE",
 	[SIGALRM] = "SIGALRM",
 	[SIGTERM] = "SIGTERM",
 	[SIGURG] = "SIGURG",
 	[SIGSTOP] = "SIGSTOP",
 	[SIGTSTP] = "SIGTSTP",
 	[SIGCONT] = "SIGCONT",
 	[SIGCHLD] = "SIGCHLD",
 	[SIGTTIN] = "SIGTTIN",
 	[SIGTTOU] = "SIGTTOU",
 	[SIGIO] = "SIGIO",
 	[SIGXCPU] = "SIGXCPU",
 	[SIGXFSZ] = "SIGXFSZ",
 	[SIGVTALRM] = "SIGVTALRM",
 	[SIGPROF] = "SIGPROF",
 	[SIGWINCH] = "SIGWINCH",
 	[SIGINFO] = "SIGINFO",
 	[SIGUSR1] = "SIGUSR1",
 	[SIGUSR2] = "SIGUSR2",
 	[SIGTHR] = "SIGTHR",
 	[SIGLIBRT] = "SIGLIBRT",
 
 	/* XXX: Solaris uses SIGRTMIN, SIGRTMIN+<x>...SIGRTMAX-<x>, SIGRTMAX */
 	[SIGRTMIN] = "SIGRT0",
 	[SIGRTMIN + 1] = "SIGRT1",
 	[SIGRTMIN + 2] = "SIGRT2",
 	[SIGRTMIN + 3] = "SIGRT3",
 	[SIGRTMIN + 4] = "SIGRT4",
 	[SIGRTMIN + 5] = "SIGRT5",
 	[SIGRTMIN + 6] = "SIGRT6",
 	[SIGRTMIN + 7] = "SIGRT7",
 	[SIGRTMIN + 8] = "SIGRT8",
 	[SIGRTMIN + 9] = "SIGRT9",
 	[SIGRTMIN + 10] = "SIGRT10",
 	[SIGRTMIN + 11] = "SIGRT11",
 	[SIGRTMIN + 12] = "SIGRT12",
 	[SIGRTMIN + 13] = "SIGRT13",
 	[SIGRTMIN + 14] = "SIGRT14",
 	[SIGRTMIN + 15] = "SIGRT15",
 	[SIGRTMIN + 16] = "SIGRT16",
 	[SIGRTMIN + 17] = "SIGRT17",
 	[SIGRTMIN + 18] = "SIGRT18",
 	[SIGRTMIN + 19] = "SIGRT19",
 	[SIGRTMIN + 20] = "SIGRT20",
 	[SIGRTMIN + 21] = "SIGRT21",
 	[SIGRTMIN + 22] = "SIGRT22",
 	[SIGRTMIN + 23] = "SIGRT23",
 	[SIGRTMIN + 24] = "SIGRT24",
 	[SIGRTMIN + 25] = "SIGRT25",
 	[SIGRTMIN + 26] = "SIGRT26",
 	[SIGRTMIN + 27] = "SIGRT27",
 	[SIGRTMIN + 28] = "SIGRT28",
 	[SIGRTMIN + 29] = "SIGRT29",
 	[SIGRTMIN + 30] = "SIGRT30",
 	[SIGRTMIN + 31] = "SIGRT31",
 	[SIGRTMIN + 32] = "SIGRT32",
 	[SIGRTMIN + 33] = "SIGRT33",
 	[SIGRTMIN + 34] = "SIGRT34",
 	[SIGRTMIN + 35] = "SIGRT35",
 	[SIGRTMIN + 36] = "SIGRT36",
 	[SIGRTMIN + 37] = "SIGRT37",
 	[SIGRTMIN + 38] = "SIGRT38",
 	[SIGRTMIN + 39] = "SIGRT39",
 	[SIGRTMIN + 40] = "SIGRT40",
 	[SIGRTMIN + 41] = "SIGRT41",
 	[SIGRTMIN + 42] = "SIGRT42",
 	[SIGRTMIN + 43] = "SIGRT43",
 	[SIGRTMIN + 44] = "SIGRT44",
 	[SIGRTMIN + 45] = "SIGRT45",
 	[SIGRTMIN + 46] = "SIGRT46",
 	[SIGRTMIN + 47] = "SIGRT47",
 	[SIGRTMIN + 48] = "SIGRT48",
 	[SIGRTMIN + 49] = "SIGRT49",
 	[SIGRTMIN + 50] = "SIGRT50",
 	[SIGRTMIN + 51] = "SIGRT51",
 	[SIGRTMIN + 52] = "SIGRT52",
 	[SIGRTMIN + 53] = "SIGRT53",
 	[SIGRTMIN + 54] = "SIGRT54",
 	[SIGRTMIN + 55] = "SIGRT55",
 	[SIGRTMIN + 56] = "SIGRT56",
 	[SIGRTMIN + 57] = "SIGRT57",
 	[SIGRTMIN + 58] = "SIGRT58",
 	[SIGRTMIN + 59] = "SIGRT59",
 	[SIGRTMIN + 60] = "SIGRT60",
 	[SIGRTMIN + 61] = "SIGRT61",
 };
 
 const char *
 sysdecode_signal(int sig)
 {
 
 	if ((unsigned)sig < nitems(signames))
 		return (signames[sig]);
 	return (NULL);
 }
Index: head/lib/libsysdecode/syscallnames.c
===================================================================
--- head/lib/libsysdecode/syscallnames.c	(revision 344854)
+++ head/lib/libsysdecode/syscallnames.c	(revision 344855)
@@ -1,112 +1,111 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Map system call codes to names for the supported ABIs on each
  * platform.  Rather than regnerating system call name tables locally
  * during the build, use the generated tables in the kernel source
  * tree.
  */
 
 #include <sys/param.h>
 #include <sys/acl.h>
 #include <sys/wait.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sysdecode.h>
 
 static
 #include <kern/syscalls.c>
 
 #if defined(__amd64__) || defined(__powerpc64__)
 static
 #include <compat/freebsd32/freebsd32_syscalls.c>
 #endif
 
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 static
 #ifdef __aarch64__
 #include <arm64/linux/linux_syscalls.c>
 #elif __amd64__
 #include <amd64/linux/linux_syscalls.c>
 #else
 #include <i386/linux/linux_syscalls.c>
 #endif
 #endif
 
 #ifdef __amd64__
 static
 #include <amd64/linux32/linux32_syscalls.c>
 #endif
 
 static
 #include <compat/cloudabi32/cloudabi32_syscalls.c>
 static
 #include <compat/cloudabi64/cloudabi64_syscalls.c>
 
 const char *
 sysdecode_syscallname(enum sysdecode_abi abi, unsigned int code)
 {
 
 	switch (abi) {
 	case SYSDECODE_ABI_FREEBSD:
 		if (code < nitems(syscallnames))
 			return (syscallnames[code]);
 		break;
 #if defined(__amd64__) || defined(__powerpc64__)
 	case SYSDECODE_ABI_FREEBSD32:
 		if (code < nitems(freebsd32_syscallnames))
 			return (freebsd32_syscallnames[code]);
 		break;
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 	case SYSDECODE_ABI_LINUX:
 		if (code < nitems(linux_syscallnames))
 			return (linux_syscallnames[code]);
 		break;
 #endif
 #ifdef __amd64__
 	case SYSDECODE_ABI_LINUX32:
 		if (code < nitems(linux32_syscallnames))
 			return (linux32_syscallnames[code]);
 		break;
 #endif
 	case SYSDECODE_ABI_CLOUDABI32:
 		if (code < nitems(cloudabi32_syscallnames))
 			return (cloudabi32_syscallnames[code]);
 		break;
 	case SYSDECODE_ABI_CLOUDABI64:
 		if (code < nitems(cloudabi64_syscallnames))
 			return (cloudabi64_syscallnames[code]);
 		break;
 	default:
 		break;
 	}
 	return (NULL);
 }
Index: head/lib/libsysdecode/sysdecode.3
===================================================================
--- head/lib/libsysdecode/sysdecode.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode.3	(revision 344855)
@@ -1,93 +1,92 @@
 .\"
 .\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd April 26, 2018
 .Dt SYSDECODE 3
 .Os
 .Sh NAME
 .Nm sysdecode
 .Nd system argument decoding library
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Sh DESCRIPTION
 The
 .Nm
 library includes several functions that provide descriptive names of
 values associated with system calls.
 .Ss Supported ABIs
 Some functions in this library provide ABI-specific descriptions.
 The supported ABIs are named by the
 .Vt enum sysdecode_abi
 enumeration.
 .Pp
 .Bl -tag -width "Li SYSDECODE_ABI_CLOUDABI64" -compact
 .It Li SYSDECODE_ABI_FREEBSD
 Native FreeBSD binaries.
 Supported on all platforms.
 .It Li SYSDECODE_ABI_FREEBSD32
 32-bit FreeBSD binaries.
 Supported on amd64 and powerpc64.
 .It Li SYSDECODE_ABI_LINUX
 Linux binaries of the same platform.
 Supported on amd64, i386, and arm64.
 .It Li SYSDECODE_ABI_LINUX32
 32-bit Linux binaries.
 Supported on amd64.
 .It Li SYSDECODE_ABI_CLOUDABI32
 32-bit CloudABI binaries.
 Supported on all platforms.
 .It Li SYSDECODE_ABI_CLOUDABI64
 64-bit CloudABI binaries.
 Supported on all platforms.
 .It Li SYSDECODE_ABI_UNKNOWN
 A placeholder for use when the ABI is not known.
 .El
 .Sh SEE ALSO
 .Xr sysdecode_abi_to_freebsd_errno 3 ,
 .Xr sysdecode_cap_rights 3 ,
 .Xr sysdecode_cmsg_type 3 ,
 .Xr sysdecode_enum 3 ,
 .Xr sysdecode_fcntl_arg 3 ,
 .Xr sysdecode_ioctlname 3 ,
 .Xr sysdecode_kevent 3 ,
 .Xr sysdecode_mask 3 ,
 .Xr sysdecode_quotactl_cmd 3 ,
 .Xr sysdecode_sctp_sinfo_flags 3 ,
 .Xr sysdecode_sigcode 3 ,
 .Xr sysdecode_socket_protocol 3 ,
 .Xr sysdecode_sockopt_name 3 ,
 .Xr sysdecode_syscallnames 3 ,
 .Xr sysdecode_utrace 3
 .Sh HISTORY
 The
 .Nm
 library first appeared in
 .Fx 11.0 .
Index: head/lib/libsysdecode/sysdecode.h
===================================================================
--- head/lib/libsysdecode/sysdecode.h	(revision 344854)
+++ head/lib/libsysdecode/sysdecode.h	(revision 344855)
@@ -1,133 +1,132 @@
 /*-
  * Copyright (c) 2015 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __SYSDECODE_H__
 #define	__SYSDECODE_H__
 
 enum sysdecode_abi {
 	SYSDECODE_ABI_UNKNOWN = 0,
 	SYSDECODE_ABI_FREEBSD,
 	SYSDECODE_ABI_FREEBSD32,
 	SYSDECODE_ABI_LINUX,
 	SYSDECODE_ABI_LINUX32,
 	SYSDECODE_ABI_CLOUDABI64,
 	SYSDECODE_ABI_CLOUDABI32
 };
 
 int	sysdecode_abi_to_freebsd_errno(enum sysdecode_abi _abi, int _error);
 bool	sysdecode_access_mode(FILE *_fp, int _mode, int *_rem);
 const char *sysdecode_acltype(int _type);
 const char *sysdecode_atfd(int _fd);
 bool	sysdecode_atflags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_cap_fcntlrights(FILE *_fp, uint32_t _rights, uint32_t *_rem);
 void	sysdecode_cap_rights(FILE *_fp, cap_rights_t *_rightsp);
 const char *sysdecode_cmsg_type(int _cmsg_level, int _cmsg_type);
 const char *sysdecode_extattrnamespace(int _namespace);
 const char *sysdecode_fadvice(int _advice);
 void	sysdecode_fcntl_arg(FILE *_fp, int _cmd, uintptr_t _arg, int _base);
 bool	sysdecode_fcntl_arg_p(int _cmd);
 const char *sysdecode_fcntl_cmd(int _cmd);
 bool	sysdecode_fcntl_fileflags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_fileflags(FILE *_fp, fflags_t _flags, fflags_t *_rem);
 bool	sysdecode_filemode(FILE *_fp, int _mode, int *_rem);
 bool	sysdecode_flock_operation(FILE *_fp, int _operation, int *_rem);
 int	sysdecode_freebsd_to_abi_errno(enum sysdecode_abi _abi, int _error);
 const char *sysdecode_getfsstat_mode(int _mode);
 const char *sysdecode_getrusage_who(int _who);
 const char *sysdecode_idtype(int _idtype);
 const char *sysdecode_ioctlname(unsigned long _val);
 const char *sysdecode_ipproto(int _protocol);
 void	sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags,
 	    int _base);
 const char *sysdecode_kevent_filter(int _filter);
 bool	sysdecode_kevent_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_kldsym_cmd(int _cmd);
 const char *sysdecode_kldunload_flags(int _flags);
 const char *sysdecode_lio_listio_mode(int _mode);
 const char *sysdecode_madvice(int _advice);
 const char *sysdecode_minherit_inherit(int _inherit);
 const char *sysdecode_msgctl_cmd(int _cmd);
 bool	sysdecode_mlockall_flags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_mmap_flags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_mmap_prot(FILE *_fp, int _prot, int *_rem);
 bool	sysdecode_mount_flags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_msg_flags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_msync_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_nfssvc_flags(int _flags);
 bool	sysdecode_open_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_pathconf_name(int _name);
 bool	sysdecode_pipe2_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_prio_which(int _which);
 const char *sysdecode_procctl_cmd(int _cmd);
 const char *sysdecode_ptrace_request(int _request);
 bool	sysdecode_quotactl_cmd(FILE *_fp, int _cmd);
 bool	sysdecode_reboot_howto(FILE *_fp, int _howto, int *_rem);
 bool	sysdecode_rfork_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_rlimit(int _resource);
 const char *sysdecode_rtprio_function(int _function);
 const char *sysdecode_scheduler_policy(int _policy);
 bool	sysdecode_sctp_nxt_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_sctp_pr_policy(int _policy);
 bool	sysdecode_sctp_rcv_flags(FILE *_fp, int _flags, int *_rem);
 void	sysdecode_sctp_sinfo_flags(FILE *_fp, int _sinfo_flags);
 bool	sysdecode_sctp_snd_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_semctl_cmd(int _cmd);
 bool	sysdecode_semget_flags(FILE *_fp, int _flag, int *_rem);
 bool	sysdecode_sendfile_flags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_shmat_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_shmctl_cmd(int _cmd);
 const char *sysdecode_shutdown_how(int _how);
 const char *sysdecode_sigbus_code(int _si_code);
 const char *sysdecode_sigchld_code(int _si_code);
 const char *sysdecode_sigcode(int _sig, int _si_code);
 const char *sysdecode_sigfpe_code(int _si_code);
 const char *sysdecode_sigill_code(int _si_code);
 const char *sysdecode_signal(int _sig);
 const char *sysdecode_sigprocmask_how(int _how);
 const char *sysdecode_sigsegv_code(int _si_code);
 const char *sysdecode_sigtrap_code(int _si_code);
 const char *sysdecode_sockaddr_family(int _sa_family);
 const char *sysdecode_socketdomain(int _domain);
 const char *sysdecode_socket_protocol(int _domain, int _protocol);
 bool	sysdecode_socket_type(FILE *_fp, int _type, int *_rem);
 const char *sysdecode_sockopt_level(int _level);
 const char *sysdecode_sockopt_name(int _level, int _optname);
 const char *sysdecode_syscallname(enum sysdecode_abi _abi, unsigned int _code);
 const char *sysdecode_sysarch_number(int _number);
 bool	sysdecode_thr_create_flags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_umtx_cvwait_flags(FILE *_fp, u_long _flags, u_long *_rem);
 const char *sysdecode_umtx_op(int _op);
 bool	sysdecode_umtx_rwlock_flags(FILE *_fp, u_long _flags, u_long *_rem);
 int	sysdecode_utrace(FILE *_fp, void *_buf, size_t _len);
 bool	sysdecode_vmprot(FILE *_fp, int _type, int *_rem);
 const char *sysdecode_vmresult(int _result);
 bool	sysdecode_wait4_options(FILE *_fp, int _options, int *_rem);
 bool	sysdecode_wait6_options(FILE *_fp, int _options, int *_rem);
 const char *sysdecode_whence(int _whence);
 
 #endif /* !__SYSDECODE_H__ */
Index: head/lib/libsysdecode/sysdecode_abi_to_freebsd_errno.3
===================================================================
--- head/lib/libsysdecode/sysdecode_abi_to_freebsd_errno.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_abi_to_freebsd_errno.3	(revision 344855)
@@ -1,97 +1,96 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 17, 2016
 .Dt sysdecode_abi_to_freebsd_errno 3
 .Os
 .Sh NAME
 .Nm sysdecode_abi_to_freebsd_errno ,
 .Nm sysdecode_freebsd_to_abi_errno
 .Nd translate error numbers between process ABIs
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft int
 .Fn sysdecode_abi_to_freebsd_errno "enum sysdecode_abi abi" "int error"
 .Ft int
 .Fn sysdecode_freebsd_to_abi_errno "enum sysdecode_abi abi" "int error"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_abi_to_freebsd_errno
 function returns the native
 .Xr errno 2
 value that corresponds to the error indicated by
 .Fa error
 for the process ABI
 .Fa abi .
 If
 .Fa error
 does not identify a valid error for
 .Fa abi ,
 .Dv INT_MAX
 is returned.
 .Pp
 The
 .Fn sysdecode_freebsd_to_abi_errno
 function the error value for the process ABI
 .Fa abi
 that corresponds to the native
 .Xr errno 2
 value
 .Fa error .
 If
 .Fa error
 does not identify a valid
 .Xr errno 2
 error,
 .Dv INT_MAX
 is returned.
 .Pp
 Note that the mappings between native
 .Xr errno 2
 values and errors for other ABIs are not exhaustive.
 If a mapping does not exist,
 these functions return
 .Dv INT_MAX .
 In addition, multiple error values in one ABI may map to a single
 error in another ABI.
 .Sh RETURN VALUES
 These functions return an error value on success or
 .Dv INT_MAX
 if
 .Fa error
 is not valid.
 .Pp
 For the list of supported ABIs,
 see
 .Xr sysdecode 3 .
 .Sh SEE ALSO
 .Xr sysdecode 3 ,
 .Xr sysdecode_syscallnames 3
Index: head/lib/libsysdecode/sysdecode_cap_rights.3
===================================================================
--- head/lib/libsysdecode/sysdecode_cap_rights.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_cap_rights.3	(revision 344855)
@@ -1,51 +1,50 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 24, 2017
 .Dt sysdecode_cap_rights 3
 .Os
 .Sh NAME
 .Nm sysdecode_cap_rights
 .Nd output list of capability rights
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In stdio.h
 .In sysdecode.h
 .Ft void
 .Fn sysdecode_cap_rights "FILE *fp" "cap_rights_t *rightsp"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_cap_rights
 function outputs a comma-separated list of capability rights at
 .Fa rightsp
 to the stream
 .Fa fp .
 .Sh SEE ALSO
 .Xr sysdecode 3
Index: head/lib/libsysdecode/sysdecode_enum.3
===================================================================
--- head/lib/libsysdecode/sysdecode_enum.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_enum.3	(revision 344855)
@@ -1,257 +1,256 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd January 14, 2018
 .Dt sysdecode_enum 3
 .Os
 .Sh NAME
 .Nm sysdecode_enum ,
 .Nm sysdecode_acltype ,
 .Nm sysdecode_atfd ,
 .Nm sysdecode_extattrnamespace ,
 .Nm sysdecode_fadvice ,
 .Nm sysdecode_fcntl_cmd ,
 .Nm sysdecode_getfsstat_mode ,
 .Nm sysdecode_getrusage_who ,
 .Nm sysdecode_idtype ,
 .Nm sysdecode_ipproto ,
 .Nm sysdecode_kldsym_cmd ,
 .Nm sysdecode_kldunload_flags ,
 .Nm sysdecode_lio_listio_mode ,
 .Nm sysdecode_madvice ,
 .Nm sysdecode_minherit_flags ,
 .Nm sysdecode_msgctl_cmd ,
 .Nm sysdecode_nfssvc_flags ,
 .Nm sysdecode_pathconf_name ,
 .Nm sysdecode_prio_which ,
 .Nm sysdecode_procctl_cmd ,
 .Nm sysdecode_ptrace_request ,
 .Nm sysdecode_rlimit ,
 .Nm sysdecode_rtprio_function ,
 .Nm sysdecode_scheduler_policy ,
 .Nm sysdecode_sctp_pr_policy ,
 .Nm sysdecode_sctp_sinfo_flags ,
 .Nm sysdecode_semctl_cmd ,
 .Nm sysdecode_shmctl_cmd ,
 .Nm sysdecode_shutdown_how ,
 .Nm sysdecode_sigbus_code ,
 .Nm sysdecode_sigchld_code ,
 .Nm sysdecode_sigfpe_code ,
 .Nm sysdecode_sigill_code ,
 .Nm sysdecode_signal ,
 .Nm sysdecode_sigprocmask_how ,
 .Nm sysdecode_sigsegv_code ,
 .Nm sysdecode_sigtrap_code ,
 .Nm sysdecode_sockaddr_family ,
 .Nm sysdecode_socketdomain ,
 .Nm sysdecode_sockettype ,
 .Nm sysdecode_sockopt_level ,
 .Nm sysdecode_sysarch_number ,
 .Nm sysdecode_umtx_op ,
 .Nm sysdecode_vmresult ,
 .Nm sysdecode_whence
 .Nd lookup name of various enumerated values
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft const char *
 .Fn sysdecode_acltype "int type"
 .Ft const char *
 .Fn sysdecode_atfd "int fd"
 .Ft const char *
 .Fn sysdecode_extattrnamespace "int namespace"
 .Ft const char *
 .Fn sysdecode_fadvice "int advice"
 .Ft const char *
 .Fn sysdecode_fcntl_cmd "int cmd"
 .Ft const char *
 .Fn sysdecode_getfsstat_mode "int mode"
 .Ft const char *
 .Fn sysdecode_getrusage_who "int who"
 .Ft const char *
 .Fn sysdecode_idtype "int idtype"
 .Ft const char *
 .Fn sysdecode_ipproto "int protocol"
 .Ft const char *
 .Fn sysdecode_kldsym_cmd "int cmd"
 .Ft const char *
 .Fn sysdecode_kldunload_flags "int flags"
 .Ft const char *
 .Fn sysdecode_lio_listio_mode "int mode"
 .Ft const char *
 .Fn sysdecode_madvice "int advice"
 .Ft const char *
 .Fn sysdecode_minherit_flags "int inherit"
 .Ft const char *
 .Fn sysdecode_msgctl_cmd "int cmd"
 .Ft const char *
 .Fn sysdecode_nfssvc_flags "int flags"
 .Ft const char *
 .Fn sysdecode_pathconf_name "int name"
 .Ft const char *
 .Fn sysdecode_prio_which "int which"
 .Ft const char *
 .Fn sysdecode_procctl_cmd "int cmd"
 .Ft const char *
 .Fn sysdecode_ptrace_request "int request"
 .Ft const char *
 .Fn sysdecode_rlimit "int resource"
 .Ft const char *
 .Fn sysdecode_rtprio_function "int function"
 .Ft const char *
 .Fn sysdecode_scheduler_policy "int policy"
 .Ft const char *
 .Fn sysdecode_sctp_pr_policy "int policy"
 .Ft const char *
 .Fn sysdecode_semctl_cmd "int cmd"
 .Ft const char *
 .Fn sysdecode_shmctl_cmd "int cmd"
 .Ft const char *
 .Fn sysdecode_shutdown_how "int how"
 .Ft const char *
 .Fn sysdecode_sigbus_code "int si_code"
 .Ft const char *
 .Fn sysdecode_sigchld_code "int si_code"
 .Ft const char *
 .Fn sysdecode_sigfpe_code "int si_code"
 .Ft const char *
 .Fn sysdecode_sigill_code "int si_code"
 .Ft const char *
 .Fn sysdecode_signal "int sig"
 .Ft const char *
 .Fn sysdecode_sigprocmask_how "int how"
 .Ft const char *
 .Fn sysdecode_sigsegv_code "int si_code"
 .Ft const char *
 .Fn sysdecode_sigtrap_code "int si_code"
 .Ft const char *
 .Fn sysdecode_sockaddr_family "int sa_family"
 .Ft const char *
 .Fn sysdecode_socketdomain "int domain"
 .Ft const char *
 .Fn sysdecode_sockettype "int type"
 .Ft const char *
 .Fn sysdecode_sockopt_level "int level"
 .Ft const char *
 .Fn sysdecode_sysarch_number "int number"
 .Ft const char *
 .Fn sysdecode_umtx_op "int op"
 .Ft const char *
 .Fn sysdecode_vmresult "int result"
 .Ft const char *
 .Fn sysdecode_whence "int whence"
 .Sh DESCRIPTION
 The
 .Nm
 functions return a text description of an integer value.
 The text description matches the name of a C macro with the same value as the
 sole function argument.
 .Dv NULL
 is returned if there is no matching C macro name.
 .Pp
 Most of these functions decode an argument passed to a system call:
 .Bl -column "Fn sysdecode_extattrnamespace" "Xr sched_setscheduler 2"
 .It Sy Function Ta Sy System Call Ta Sy Argument
 .It Fn sysdecode_acltype Ta Xr acl_get_file 3 Ta Fa type
 .It Fn sysdecode_atfd Ta Xr openat 2 Ta Fa fd
 .It Fn sysdecode_extattrnamespace Ta Xr extattr_get_fd 2 Ta Fa attrnamespace
 .It Fn sysdecode_fadvice Ta Xr posix_fadvise 2 Ta Fa advice
 .It Fn sysdecode_fcntl_cmd Ta Xr fcntl 2 Ta Fa cmd
 .It Fn sysdecode_getfsstat_mode Ta Xr getfsstat 2 Ta Fa mode
 .It Fn sysdecode_idtype Ta
 .Xr procctl 2 ,
 .Xr waitid 2
 .Ta Fa idtype
 .It Fn sysdecode_kldsym_cmd Ta Xr kldsym 2 Ta Fa cmd
 .It Fn sysdecode_kldunload_flags Ta Xr kldunloadf 2 Ta Fa flags
 .It Fn sysdecode_lio_listio_mode Ta Xr lio_listio 2 Ta Fa mode
 .It Fn sysdecode_madvice Ta Xr madvise 2 Ta Fa advice
 .It Fn sysdecode_minherit_inherit Ta Xr minherit 2 Ta Fa inherit
 .It Fn sysdecode_msgctl_cmd Ta Xr msgctl 2 Ta Fa cmd
 .It Fn sysdecode_nfssvc_flags Ta Xr nfssvc 2 Ta Fa flags
 .It Fn sysdecode_pathconf_name Ta Xr pathconf 2 Ta Fa name
 .It Fn sysdecode_prio_which Ta Xr getpriority 2 Ta Fa which
 .It Fn sysdecode_procctl_cmd Ta Xr procctl 2 Ta Fa cmd
 .It Fn sysdecode_ptrace_request Ta Xr ptrace 2 Ta Fa request
 .It Fn sysdecode_rlimit Ta Xr getrlimit 2 Ta Fa resource
 .It Fn sysdecode_rtprio_function Ta Xr rtprio 2 Ta Fa function
 .It Fn sysdecode_getrusage_who Ta Xr getrusage 2 Ta Fa who
 .It Fn sysdecode_scheduler_policy Ta Xr sched_setscheduler 2 Ta Fa policy
 .It Fn sysdecode_semctl_cmd Ta Xr semctl 2 Ta Fa cmd
 .It Fn sysdecode_shmctl_cmd Ta Xr shmctl 2 Ta Fa cmd
 .It Fn sysdecode_shutdown_how Ta Xr shutdown 2 Ta Fa how
 .It Fn sysdecode_sigprocmask_how Ta Xr sigprocmask 2 Ta Fa how
 .It Fn sysdecode_sockopt_level Ta Xr getsockopt 2 Ta Fa level
 .It Fn sysdecode_sysarch_number Ta Xr sysarch 2 Ta Fa number
 .It Fn sysdecode_umtx_op Ta Xr _umtx_op 2 Ta Fa op
 .It Fn sysdecode_whence Ta Xr lseek 2 Ta Fa whence
 .El
 .Pp
 These functions decode signal-specific signal codes stored in the
 .Fa si_code
 field of the
 .Vt siginfo_t
 object associated with an instance of signal:
 .Bl -column "Fn sysdecode_sigchld_code"
 .It Sy Function Ta Sy Signal
 .It Fn sysdecode_sigbus_code Ta Dv SIGBUS
 .It Fn sysdecode_sigchld_code Ta Dv SIGCHLD
 .It Fn sysdecode_sigfpe_code Ta Dv SIGFPE
 .It Fn sysdecode_sigill_code Ta Dv SIGILL
 .It Fn sysdecode_sigsegv_code Ta Dv SIGSEGV
 .It Fn sysdecode_sigtrap_code Ta Dv SIGBTRAP
 .El
 .Pp
 Other functions decode the values described below:
 .Bl -tag -width "Fn sysdecode_sockaddr_family"
 .It Fn sysdecode_ipproto
 An IP protocol.
 .It Fn sysdecode_sctp_pr_policy
 A PR-SCTP policy.
 .It Fn sysdecode_signal
 A process signal.
 .It Fn sysdecode_sockaddr_family
 A socket address family.
 .It Fn sysdecode_socketdomain
 A socket domain.
 .It Fn sysdecode_vmresult
 The return value of a function in the virtual memory subsystem of the kernel
 indicating the status of the associated request.
 .El
 .Sh RETURN VALUES
 The
 .Nm
 functions return the name of a matching C macro or
 .Dv NULL
 if no matching C macro was found.
 .Sh SEE ALSO
 .Xr sysdecode 3 ,
 .Xr sysdecode_mask 3 ,
 .Xr sysdecode_sigcode 3
Index: head/lib/libsysdecode/sysdecode_fcntl_arg.3
===================================================================
--- head/lib/libsysdecode/sysdecode_fcntl_arg.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_fcntl_arg.3	(revision 344855)
@@ -1,122 +1,121 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 24, 2017
 .Dt sysdecode_fcntl_arg 3
 .Os
 .Sh NAME
 .Nm sysdecode_fcntl_arg ,
 .Nm sysdecode_fcntl_arg_p
 .Nd output description of fcntl argument
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In stdio.h
 .In sysdecode.h
 .Ft void
 .Fn sysdecode_fcntl_arg "FILE *fp" "int cmd" "uintptr_t arg" "int base"
 .Ft bool
 .Fn sysdecode_fcntl_arg_p "int cmd"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_fcntl_arg
 function outputs a text description of the optional
 .Fa arg
 argument to
 .Xr fcntl 2
 to the stream
 .Fa fp .
 The type and format of
 .Fa arg
 are determined by
 .Fa cmd :
 .Bl -column ".Dv F_SETLKW" "Vt struct flock *"
 .It Sy Command Ta Fa arg Sy Type Ta Sy Output Format
 .It
 .It Dv F_SETFD Ta Vt int Ta
 .Dq FD_CLOEXEC
 or the value of
 .Fa arg
 in the indicated
 .Fa base
 .Pq one of 8, 10, or 16 .
 .It
 .It Dv F_SETFL Ta Vt int Ta
 File flags as output by
 .Xr sysdecode_fcntl_fileflags 3
 with any unknown or remaining bits output in hexadecimal.
 .It
 .It Dv F_GETLK Ta Vt struct flock * Ta
 .It Dv F_SETLK Ta Vt struct flock * Ta
 .It Dv F_SETLKW Ta Vt struct flock * Ta
 The value of
 .Fa arg
 using the
 .Dq %p
 conversion specification.
 .It
 .It Others Ta Vt int Ta
 The value of
 .Fa arg
 in the indicated
 .Fa base
 .Pq one of 8, 10, or 16 .
 .El
 .Pp
 The
 .Fn sysdecode_fcntl_arg_p
 function can be used to determine if a
 .Xr fcntl 2
 command uses the optional third argument to
 .Xr fcntl 2 .
 The function returns
 .Dv true
 if
 .Fa cmd
 accepts a third argument to
 .Xr fcntl 2
 and
 .Dv false
 if it does not.
 .Sh RETURN VALUES
 The
 .Nm sysdecode_fcntl_arg_p
 function returns
 .Dv true
 if
 .Fa cmd
 accepts a third argument to
 .Xr fcntl 2
 and
 .Dv false
 if it does not.
 .Sh SEE ALSO
 .Xr sysdecode 3 ,
 .Xr sysdecode_fcntl_cmd 3 ,
 .Xr sysdecode_fcntl_fileflags 3
Index: head/lib/libsysdecode/sysdecode_ioctlname.3
===================================================================
--- head/lib/libsysdecode/sysdecode_ioctlname.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_ioctlname.3	(revision 344855)
@@ -1,60 +1,59 @@
 .\"
 .\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 17, 2016
 .Dt sysdecode_ioctlname 3
 .Os
 .Sh NAME
 .Nm sysdecode_ioctlname
 .Nd lookup name of device control command
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft const char *
 .Fn sysdecode_ioctlname "unsigned long request"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_ioctlname
 function returns the name of a device control request identified by
 .Fa request .
 A table of names is generated during the build of the
 .Nm sysdecode
 library from system headers that maps device control request values to
 the name of the corresponding C macro.
 .Sh RETURN VALUES
 The
 .Fn sysdecode_ioctlname
 function returns the name of a device control request if
 .Fa request
 is a known value;
 otherwise
 .Dv NULL .
 .Sh SEE ALSO
 .Xr sysdecode 3
Index: head/lib/libsysdecode/sysdecode_kevent.3
===================================================================
--- head/lib/libsysdecode/sysdecode_kevent.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_kevent.3	(revision 344855)
@@ -1,126 +1,125 @@
 .\"
 .\" Copyright (c) 2017 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 24, 2017
 .Dt sysdecode_kevent 3
 .Os
 .Sh NAME
 .Nm sysdecode_kevent ,
 .Nm sysdecode_kevent_fflags ,
 .Nm sysdecode_kevent_filter ,
 .Nm sysdecode_kevent_flags
 .Nd output description of kevent structure fields
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In stdio.h
 .In sysdecode.h
 .Ft void
 .Fn sysdecode_kevent_fflags "FILE *fp" "short filter" "int fflags" "int base"
 .Ft bool
 .Fn sysdecode_kevent_flags "FILE *fp" "int flags" "int *rem"
 .Ft const char *
 .Fn sysdecode_kevent_filter "int filter"
 .Sh DESCRIPTION
 These functions provide text descriptions of
 .Vt struct kevent
 fields.
 .Pp
 The
 .Fn sysdecode_kevent_fflags
 function outputs a text description of the
 .Fa fflags
 member of a
 .Vt struct kevent
 to the stream
 .Fa fp .
 For the
 .Dv EVFILT_READ ,
 .Dv EVFILT_WRITE ,
 .Dv EVFILT_VNODE ,
 .Dv EVFILT_PROC ,
 .Dv EVFILT_PROCDESC ,
 .Dv EVFILT_TIMER ,
 and
 .Dv EVFILT_USER
 filters,
 .Fn sysdecode_kevent_fflags
 outputs a bitmask of filter-specific
 .Dv NOTE_*
 flags as documented in
 .Xr kevent 2 .
 For other values of
 .Fa filter ,
 the value of
 .Fa fflags
 is output in the indicated
 .Fa base
 .Pq one of 8, 10, or 16 .
 .Pp
 The
 .Fn sysdecode_kevent_filter
 function returns a text description of the
 .Fa filter
 member of a
 .Vt struct kevent .
 .Dv NULL
 is returned if the
 .Fa filter
 value is unknown.
 .Pp
 The
 .Fn sysdecode_kevent_flags
 function outputs a text description of the
 .Fa flags
 member of a
 .Vt struct kevent
 to the stream
 .Fa fp .
 This function uses the same calling convention and formatting as the other
 functions described in
 .Xr sysdecode_mask 3 .
 .Sh RETURN VALUES
 The
 .Nm sysdecode_kevent_filter
 function returns the name of a filter or
 .Dv NULL if the filter value is unknown.
 .Pp
 The
 .Nm sysdecode_kevent_flags
 function returns
 .Dv true
 if any flags in the
 .Fa flags
 field were decoded and
 .Dv false
 if no flags were decoded.
 .Sh SEE ALSO
 .Xr sysdecode 3 ,
 .Xr sysdecode_enum 3 ,
 .Xr sysdecode_mask 3
Index: head/lib/libsysdecode/sysdecode_mask.3
===================================================================
--- head/lib/libsysdecode/sysdecode_mask.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_mask.3	(revision 344855)
@@ -1,243 +1,242 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd January 16, 2018
 .Dt sysdecode_mask 3
 .Os
 .Sh NAME
 .Nm sysdecode_mask ,
 .Nm sysdecode_accessmode ,
 .Nm sysdecode_atflags ,
 .Nm sysdecode_capfcntlrights ,
 .Nm sysdecode_fcntl_fileflags ,
 .Nm sysdecode_fileflags ,
 .Nm sysdecode_filemode ,
 .Nm sysdecode_flock_operation ,
 .Nm sysdecode_mlockall_flags ,
 .Nm sysdecode_mmap_flags ,
 .Nm sysdecode_mmap_prot ,
 .Nm sysdecode_mount_flags ,
 .Nm sysdecode_msg_flags ,
 .Nm sysdecode_msync_flags ,
 .Nm sysdecode_open_flags ,
 .Nm sysdecode_pipe2_flags ,
 .Nm sysdecode_reboot_howto ,
 .Nm sysdecode_rfork_flags ,
 .Nm sysdecode_semget_flags ,
 .Nm sysdecode_sendfile_flags ,
 .Nm sysdecode_shmat_flags ,
 .Nm sysdecode_sctp_nxt_flags ,
 .Nm sysdecode_sctp_rcv_flags ,
 .Nm sysdecode_sctp_snd_flags ,
 .Nm sysdecode_socket_type ,
 .Nm sysdecode_thr_create_flags ,
 .Nm sysdecode_umtx_cvwait_flags ,
 .Nm sysdecode_umtx_rwlock_flags ,
 .Nm sysdecode_vmprot ,
 .Nm sysdecode_wait4_options ,
 .Nm sysdecode_wait6_options
 .Nd print name of various bitmask values
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In stdio.h
 .In sysdecode.h
 .Ft bool
 .Fn sysdecode_access_mode "FILE *fp" "int mode" "int *rem"
 .Ft bool
 .Fn sysdecode_atflags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_cap_fcntlrights "FILE *fp" "uint32_t rights" "uint32_t *rem"
 .Ft bool
 .Fn sysdecode_fcntl_fileflags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_fileflags "FILE *fp" "fflags_t flags" "fflags_t *rem"
 .Ft bool
 .Fn sysdecode_filemode "FILE *fp" "int mode" "int *rem"
 .Ft bool
 .Fn sysdecode_flock_operation "FILE *fp" "int operation" "int *rem"
 .Ft bool
 .Fn sysdecode_mlockall_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_mmap_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_mmap_prot "FILE *fp" "int prot" "int *rem"
 .Ft bool
 .Fn sysdecode_mount_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_msg_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_msync_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_open_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_pipe2_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_reboot_howto "FILE *fp" "int howto" "int *rem"
 .Ft bool
 .Fn sysdecode_rfork_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_sctp_nxt_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_sctp_rcv_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_sctp_snd_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_semget_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_sendfile_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_shmat_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_socket_type "FILE *fp" "int type" "int *rem"
 .Ft bool
 .Fn sysdecode_thr_create_flags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_umtx_cvwait_flags "FILE *fp" "u_long flags" "u_long *rem"
 .Ft bool
 .Fn sysdecode_umtx_rwlock_flags "FILE *fp" "u_long flags" "u_long *rem"
 .Ft bool
 .Fn sysdecode_vmprot "FILE *fp" "int type" "int *rem"
 .Ft bool
 .Fn sysdecode_wait4_options "FILE *fp" "int options" "int *rem"
 .Ft bool
 .Fn sysdecode_wait6_options "FILE *fp" "int options" "int *rem"
 .Sh DESCRIPTION
 The
 .Nm
 functions are used to generate a text description of an integer value
 built from a mask of bitfields.
 The text description lists the C macros for field values joined by pipe
 .Sq |
 characters matching the format used in C source code.
 Most of the values decoded by these functions are passed as arguments to
 system calls,
 though some of these values are used internally in the kernel.
 .Pp
 Each function writes the text description to
 .Fa fp .
 The second argument should contain the integer value to be decoded.
 The
 .Fa rem
 argument is set to the value of any bits that were not decoded
 .Pq bit fields that do not have a corresponding C macro .
 .Fa rem
 may be set to
 .Dv NULL
 if the caller does not need this value.
 Each function returns
 .Dv true
 if any bit fields in the value were decoded and
 .Dv false
 if no bit fields were decoded.
 .Pp
 Most of these functions decode an argument passed to a system call:
 .Bl -column "Fn sysdecode_flock_operation" "Xr cap_fcntls_limit 2"
 .It Sy Function Ta Sy System Call Ta Sy Argument
 .It Fn sysdecode_access_mode Ta Xr access 2 Ta Fa mode
 .It Fn sysdecode_atflags Ta Xr chflagsat 2 , Xr fstatat 2 Ta Fa atflag , Fa flag
 .It Fn sysdecode_cap_fcntlrights Ta Xr cap_fcntls_limit 2 Ta Fa fcntlrights
 .It Fn sysdecode_fileflags Ta Xr chflags 2 Ta Fa flags
 .It Fn sysdecode_filemode Ta Xr chmod 2 , Xr open 2 Ta mode
 .It Fn sysdecode_flock_operation Ta Xr flock 2 Ta Fa operation
 .It Fn sysdecode_mlockall_flags Ta Xr mlockall 2 Ta Fa flags
 .It Fn sysdecode_mmap_flags Ta Xr mmap 2 Ta Fa flags
 .It Fn sysdecode_mmap_prot Ta Xr mmap 2 Ta Fa prot
 .It Fn sysdecode_mount_flags Ta Xr mount 2 Ta Fa flags
 .It Fn sysdecode_msg_flags Ta Xr recv 2 , Xr send 2 Ta Fa flags
 .It Fn sysdecode_msync_flags Ta Xr msync 2 Ta Fa flags
 .It Fn sysdecode_open_flags Ta Xr open 2 Ta Fa flags
 .It Fn sysdecode_pipe2_flags Ta Xr pipe2 Ta Fa flags
 .It Fn sysdecode_reboot_howto Ta Xr reboot 2 Ta Fa howto
 .It Fn sysdecode_rfork_flags Ta Xr rfork 2 Ta Fa flags
 .It Fn sysdecode_semget_flags Ta Xr semget 2 Ta Fa flags
 .It Fn sysdecode_sendfile_flags Ta Xr sendfile 2 Ta Fa flags
 .It Fn sysdecode_shmat_flags Ta Xr shmat 2 Ta Fa flags
 .It Fn sysdecode_socket_type Ta Xr socket 2 Ta Fa type
 .It Fn sysdecode_thr_create_flags Ta Xr thr_create 2 Ta Fa flags
 .It Fn sysdecode_wait4_options Ta Xr wait4 2 Ta Fa options
 .It Fn sysdecode_wait6_options Ta Xr wait6 2 Ta Fa options
 .El
 .Pp
 Other functions decode the values described below:
 .Bl -tag -width ".Fn sysdecode_umtx_cvwait_flags"
 .It Fn sysdecode_fcntl_fileflags
 The file flags used with the
 .Dv F_GETFL
 and
 .Dv F_SETFL
 .Xr fcntl 2
 commands.
 .It Fn sysdecode_sctp_nxt_flags
 The 
 .Fa nxt_flags
 member of a
 .Vt struct sctp_nxtinfo .
 .It Fn sysdecode_sctp_rcv_flags
 The 
 .Fa rcv_flags
 member of a
 .Vt struct sctp_rcvinfo .
 .It Fn sysdecode_sctp_snd_flags
 The 
 .Fa snd_flags
 member of a
 .Vt struct sctp_sndinfo .
 .It Fn sysdecode_umtx_cvwait_flags
 The
 .Fa val
 argument to
 .Xr _umtx_op 2
 for
 .Dv UMTX_OP_CV_WAIT
 operations.
 .It Fn sysdecode_umtx_rwlock_flags
 The
 .Fa val
 argument to
 .Xr _umtx_op 2
 for
 .Dv UMTX_OP_RW_RDLOCK
 operations.
 .It Fn sysdecode_vmprot
 The memory protection flags stored in
 .Vt vm_prot_t
 variables.
 .El
 .Sh RETURN VALUES
 The
 .Nm
 functions return
 .Dv true
 if any bit fields in the value were decoded and
 .Dv false
 if no bit fields were decoded.
 .Sh SEE ALSO
 .Xr sysdecode 3 ,
 .Xr sysdecode_enum 3
Index: head/lib/libsysdecode/sysdecode_quotactl_cmd.3
===================================================================
--- head/lib/libsysdecode/sysdecode_quotactl_cmd.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_quotactl_cmd.3	(revision 344855)
@@ -1,94 +1,93 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 24, 2017
 .Dt sysdecode_quotactl_cmd 3
 .Os
 .Sh NAME
 .Nm sysdecode_quotactl_cmd
 .Nd output name of quotactl command
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In stdio.h
 .In sysdecode.h
 .Ft bool
 .Fn sysdecode_quotactl_cmd "FILE *fp" "int cmd"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_quotactl_cmd
 function outputs a text description of the
 .Fa cmd
 argument to
 .Xr quotactl 2
 to the stream
 .Fa fp .
 The description is formatted as an invocation of the
 .Dv QCMD
 macro defined in the
 .In ufs/ufs/quota.h
 header.
 .Pp
 The function first computes the primary and secondary values used by
 .Dv QCMD
 to construct
 .Fa cmd .
 If the primary command value does not represent a known constant,
 .Fn sysdecode_quotactl_cmd
 does not generate any output and returns
 .Dv false .
 Otherwise,
 .Fn sysdecode_quotactl_cmd
 outputs text depicting an invocation of
 .Dv QCMD
 with the associated constants for the primary and secondary command values
 and returns
 .Dv true .
 If the secondary command values does not represent a known constant,
 its value is output as a hexadecimal integer.
 .Sh RETURN VALUES
 The
 .Nm sysdecode_quotactl_cmd
 function returns
 .Dv true
 if it outputs a description of
 .Fa cmd
 and
 .Dv false
 if it does not.
 .Sh EXAMPLES
 The statement
 .Pp
 .Dl sysdecode_quotatcl_cmd(stdout, QCMD(Q_GETQUOTA, USRQUOTA);
 .Pp
 outputs the text
 .Dq QCMD(Q_GETQUOTA, USRQUOTA)
 to standard output.
 .Sh SEE ALSO
 .Xr sysdecode 3
Index: head/lib/libsysdecode/sysdecode_sigcode.3
===================================================================
--- head/lib/libsysdecode/sysdecode_sigcode.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_sigcode.3	(revision 344855)
@@ -1,83 +1,82 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 17, 2016
 .Dt sysdecode_sigcode 3
 .Os
 .Sh NAME
 .Nm sysdecode_sigcode
 .Nd lookup name of signal code
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft const char *
 .Fn sysdecode_sigcode "int signal" "int si_code"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_sigcode
 function returns a text description of the
 .Fa si_code
 field of the
 .Vt siginfo_t
 object associated with an instance of signal
 .Fa sig .
 The text description contains the name of the C macro whose value matches
 .Fa si_code .
 General purpose signal codes such as
 .Dv SI_USER
 are handled as well as signal-specific codes for
 .Dv SIGBUS ,
 .Dv SIGCHLD ,
 .Dv SIGFPE ,
 .Dv SIGILL ,
 .Dv SIGSEGV
 and
 .Dv SIGTRAP .
 If
 .Fa si_code
 does not represent a known signal code,
 .Fn sysdecode_sigcode
 returns
 .Dv NULL .
 .Sh RETURN VALUES
 The
 .Fn sysdecode_sigcode
 function returns a pointer to a signal code description or
 .Dv NULL
 if
 .Fa si_code
 is not a known signal code.
 .Sh SEE ALSO
 .Xr sysdecode_sigbus_code 3 ,
 .Xr sysdecode_sigchld_code 3 ,
 .Xr sysdecode_sigfpe_code 3 ,
 .Xr sysdecode_sigill_code 3 ,
 .Xr sysdecode_sigsegv_code 3 ,
 .Xr sysdecode_sigtrap_code 3
Index: head/lib/libsysdecode/sysdecode_socket_protocol.3
===================================================================
--- head/lib/libsysdecode/sysdecode_socket_protocol.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_socket_protocol.3	(revision 344855)
@@ -1,54 +1,53 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd May 25, 2017
 .Dt sysdecode_socket_protocol 3
 .Os
 .Sh NAME
 .Nm sysdecode_socket_protocol
 .Nd lookup name of socket protocol
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft const char *
 .Fn sysdecode_socket_protocol "int domain" "int protocol"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_socket_protocol
 function returns a text description of the protocol passed in the
 .Fa protocol
 argument to
 .Xr socket 2 .
 .Fn sysdecode_socket_protocol
 takes the
 .Fa protocol
 as well as the
 .Fa domain
 to uniquely identify the protocol.
Index: head/lib/libsysdecode/sysdecode_sockopt_name.3
===================================================================
--- head/lib/libsysdecode/sysdecode_sockopt_name.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_sockopt_name.3	(revision 344855)
@@ -1,61 +1,60 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 17, 2016
 .Dt sysdecode_sockopt_name 3
 .Os
 .Sh NAME
 .Nm sysdecode_sockopt_name
 .Nd lookup name of socket option
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft const char *
 .Fn sysdecode_sockopt_name "int level" "int optname"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_sockopt_name
 function returns a text description of the socket option name passed in the
 .Fa optname
 argument to
 .Xr getsockopt 2 .
 .Fn sysdecode_sockopt_name
 takes the socket option
 .Fa level
 as well as the option name to uniquely identify the option.
 .Sh SEE ALSO
 .Xr sysdecode_sockopt_level 3
 .Sh BUGS
 Socket option levels and names are protocol-specific.
 Both
 .Fn sysdecode_sockopt_level
 and
 .Fn sysdecode_sockopt_name
 should possibly accept the protocol family as an additional argument.
Index: head/lib/libsysdecode/sysdecode_syscallnames.3
===================================================================
--- head/lib/libsysdecode/sysdecode_syscallnames.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_syscallnames.3	(revision 344855)
@@ -1,71 +1,70 @@
 .\"
 .\" Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 17, 2016
 .Dt sysdecode_syscallnames 3
 .Os
 .Sh NAME
 .Nm sysdecode_syscallnames
 .Nd lookup name of system calls
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In sysdecode.h
 .Ft const char *
 .Fn sysdecode_syscallnames "enum sysdecode_abi abi" "unsigned int code"
 .Sh DESCRIPTION
 This function returns a pointer to the name of a system call identified by
 .Fa code
 for the process ABI
 .Fa abi .
 If
 .Fa code
 specifies an unknown system call or
 .Fa abi
 is an unsupported ABI,
 .Nm
 returns
 .Dv NULL .
 .Pp
 For the list of supported ABIs,
 see
 .Xr sysdecode 3 .
 .Sh RETURN VALUES
 The
 .Nm
 function returns a pointer to a string on success or
 .Dv NULL
 if either
 .Fa code
 or
 .Fa ABI
 is invalid .
 .Sh SEE ALSO
 .Xr sysdecode 3 ,
 .Xr sysdecode_abi_to_freebsd_errno 3
Index: head/lib/libsysdecode/sysdecode_utrace.3
===================================================================
--- head/lib/libsysdecode/sysdecode_utrace.3	(revision 344854)
+++ head/lib/libsysdecode/sysdecode_utrace.3	(revision 344855)
@@ -1,77 +1,76 @@
 .\"
 .\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 24, 2017
 .Dt sysdecode_utrace 3
 .Os
 .Sh NAME
 .Nm sysdecode_utrace
 .Nd produce text description of a utrace record
 .Sh LIBRARY
 .Lb libsysdecode
 .Sh SYNOPSIS
 .In sys/types.h
 .In stdbool.h
 .In stdio.h
 .In sysdecode.h
 .Ft int
 .Fn sysdecode_utrace "FILE *fp" "void *buf" "size_t len" "int decimal"
 .Sh DESCRIPTION
 The
 .Fn sysdecode_utrace
 function outputs a textual representation of a
 .Xr utrace 2
 record identified by
 .Fa buf
 and
 .Fa len
 to the output stream
 .Fa fp .
 .Pp
 The function only outputs a representation for certain types of records.
 If a record is recognized,
 the function outputs the description and returns a non-zero value.
 If the record is not recognized,
 the function does not output anything and returns zero.
 The
 .Fn sysdecode_utrace
 function currently recognizes
 .Xr utrace 2
 records generated by
 .Xr malloc 3
 and
 .Xr rtld 1 .
 .Sh RETURN VALUES
 The
 .Fn sysdecode_utrace
 function returns a non-zero value if it recognizes a
 .Xr utrace 2
 record;
 otherwise it returns zero.
 .Sh SEE ALSO
 .Xr utrace 2 ,
 .Xr sysdecode 3
Index: head/lib/libutil/kinfo_getvmobject.3
===================================================================
--- head/lib/libutil/kinfo_getvmobject.3	(revision 344854)
+++ head/lib/libutil/kinfo_getvmobject.3	(revision 344855)
@@ -1,74 +1,73 @@
 .\"
 .\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd May 27, 2015
 .Dt KINFO_GETVMOBJECT 3
 .Os
 .Sh NAME
 .Nm kinfo_getvmobject
 .Nd function for getting system-wide memory information
 .Sh LIBRARY
 .Lb libutil
 .Sh SYNOPSIS
 .In sys/types.h
 .In sys/user.h
 .In libutil.h
 .Ft struct kinfo_vmobject *
 .Fn kinfo_getvmobject "int *cntp"
 .Sh DESCRIPTION
 This function is used to obtain information about the objects using memory
 in the system.
 .Pp
 The
 .Ar cntp
 argument allows the caller to know how many records are returned.
 .Pp
 This function is a wrapper around the
 .Dq vm.objects
 .Xr sysctl 3
 MIB.
 While the kernel returns a packed structure, this function expands the
 data into a fixed record format.
 .Sh RETURN VALUES
 On success the
 .Fn kinfo_getvmobject
 function returns a pointer to an array of
 .Vt struct kinfo_vmobject
 structures as defined by
 .In sys/user.h .
 The array is allocated by an internal call to
 .Xr malloc 3
 and must be freed by the caller with a call to
 .Xr free 3 .
 On failure the
 .Fn kinfo_getvmobject
 function returns
 .Dv NULL .
 .Sh SEE ALSO
 .Xr free 3 ,
 .Xr kinfo_getvmmap 3 ,
 .Xr malloc 3
Index: head/sbin/hastd/refcnt.h
===================================================================
--- head/sbin/hastd/refcnt.h	(revision 344854)
+++ head/sbin/hastd/refcnt.h	(revision 344855)
@@ -1,65 +1,64 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __REFCNT_H__
 #define __REFCNT_H__
 
 #include <machine/atomic.h>
 
 #include "pjdlog.h"
 
 typedef unsigned int refcnt_t;
 
 static __inline void
 refcnt_init(refcnt_t *count, unsigned int v)
 {
 
 	*count = v;
 }
 
 static __inline void
 refcnt_acquire(refcnt_t *count)
 {
 
 	atomic_add_acq_int(count, 1);
 }
 
 static __inline unsigned int
 refcnt_release(refcnt_t *count)
 {
 	unsigned int old;
 
 	/* XXX: Should this have a rel membar? */
 	old = atomic_fetchadd_int(count, -1);
 	PJDLOG_ASSERT(old > 0);
 	return (old - 1);
 }
 
 #endif	/* ! __REFCNT_H__ */
Index: head/share/man/man3/sigevent.3
===================================================================
--- head/share/man/man3/sigevent.3	(revision 344854)
+++ head/share/man/man3/sigevent.3	(revision 344855)
@@ -1,127 +1,126 @@
 .\" -*- nroff -*-
 .\"
 .\" Copyright (c) 2016 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd July 15, 2016
 .Dt SIGEVENT 3
 .Os
 .Sh NAME
 .Nm sigevent
 .Nd "asynchronous event notification"
 .Sh SYNOPSIS
 .In signal.h
 .Sh DESCRIPTION
 Some operations permit threads to request asychronous notification of events
 via a
 .Vt struct sigevent
 structure.
 This structure contains several fields that describe the requested notification:
 .Bl -column ".Vt void (*)(union sigval)" ".Va sigev_notify_kevent_flags"
 .It Sy Type Ta Sy Member Ta Sy Description
 .It Vt int Ta sigev_notify Ta notification method
 .It Vt int Ta sigev_signo Ta signal number
 .It Vt union sigval Ta sigev_value Ta signal value
 .It Vt int Ta sigev_notify_kqueue Ta
 .Xr kqueue 2
 file descriptor
 .It Vt unsigned short Ta sigev_notify_kevent_flags Ta kevent flags
 .It Vt lwpid_t Ta sigev_notify_thread_id Ta LWP ID
 .It Vt void (*)(union sigval) Ta sigev_notify_function Ta
 callback function pointer
 .It Vt pthread_attr_t * Ta sigev_notify_attributes Ta
 callback thread attributes
 .El
 .Pp
 The
 .Va sigev_notify
 field specifies the notification method used when the event triggers:
 .Bl -tag -width ".Dv SIGEV_THREAD_ID"
 .It Dv SIGEV_NONE
 No notification is sent.
 .It Dv SIGEV_SIGNAL
 The signal
 .Va sigev_signo
 is queued as a real-time signal to the calling process.
 The value stored in
 .Va sigev_value
 will be present in the
 .Va si_value
 of the
 .Vt siginfo_t
 structure of the queued signal.
 .It Dv SIGEV_THREAD
 The notification function in
 .Va sigev_notify_function
 is called in a separate thread context.
 The thread is created with the attributes specified in
 .Va *sigev_notify_attributes .
 The value stored in
 .Va sigev_value
 is passed as the sole argument to
 .Va sigev_notify_function .
 If
 .Va sigev_notify_attributes
 is
 .Dv NULL ,
 the thread is created with default attributes.
 .It Dv SIGEV_KEVENT
 A new kevent is posted to the kqueue
 .Va sigev_notify_kqueue .
 The
 .Va udata
 member of the kevent structure contains the value stored in
 .Va sigev_value .
 The meaning of other fields in the kevent are specific to the type of triggered
 event.
 .It Dv SIGEV_THREAD_ID
 The signal
 .Va sigev_signo
 is queued to the thread whose LWP ID is
 .Va sigev_notify_thread_id .
 The value stored in
 .Va sigev_value
 will be present in the
 .Va si_value
 of the
 .Vt siginfo_t
 structure of the queued signal.
 .El
 .Sh NOTES
 Note that programs wishing to use
 .Dv SIGEV_THREAD
 notifications must link against the
 .Lb librt .
 .Sh SEE ALSO
 .Xr aio_read 2 ,
 .Xr mq_notify 2 ,
 .Xr timer_create 2 ,
 .Xr siginfo 3
 .Sh STANDARDS
 The
 .Vt struct sigevent
 type conforms to
 .St -p1003.1-2004 .
Index: head/share/man/man4/ktr.4
===================================================================
--- head/share/man/man4/ktr.4	(revision 344854)
+++ head/share/man/man4/ktr.4	(revision 344855)
@@ -1,211 +1,210 @@
 .\" Copyright (c) 2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 20, 2012
 .Dt KTR 4
 .Os
 .Sh NAME
 .Nm ktr
 .Nd kernel tracing facility
 .Sh SYNOPSIS
 .Cd options KTR
 .Cd options ALQ
 .Cd options KTR_ALQ
 .Cd options KTR_COMPILE=(KTR_LOCK|KTR_INTR|KTR_PROC)
 .Cd options KTR_CPUMASK=0x3
 .Cd options KTR_ENTRIES=8192
 .Cd options KTR_MASK=(KTR_INTR|KTR_PROC)
 .Cd options KTR_VERBOSE
 .Sh DESCRIPTION
 The
 .Nm
 facility allows kernel events to be logged while the kernel executes so that
 they can be examined later when debugging.
 The only mandatory option to enable
 .Nm
 is
 .Dq Li options KTR .
 .Pp
 The
 .Dv KTR_ENTRIES
 option sets the size of the buffer of events.
 The size of the buffer in the currently running kernel can be found via the
 sysctl
 .Va debug.ktr.entries .
 By default the buffer contains 1024 entries.
 .Ss Event Masking
 Event levels can be enabled or disabled to trim excessive and overly verbose
 logging.
 First, a mask of events is specified at compile time via the
 .Dv KTR_COMPILE
 option to limit which events are actually compiled into the kernel.
 The default value for this option is for all events to be enabled.
 .Pp
 Secondly, the actual events logged while the kernel runs can be further
 masked via the run time event mask.
 The
 .Dv KTR_MASK
 option sets the default value of the run time event mask.
 The runtime event mask can also be set by the
 .Xr loader 8
 via the
 .Va debug.ktr.mask
 environment variable.
 It can also be examined and set after booting via the
 .Va debug.ktr.mask
 sysctl.
 By default the run time mask is set to block any tracing.
 The definitions of the event mask bits can be found in
 .In sys/ktr.h .
 .Pp
 Furthermore, there is a CPU event mask whose default value can be changed via
 the
 .Dv KTR_CPUMASK
 option.
 When two or more parameters to
 .Dv KTR_CPUMASK ,
 are used, it is important they are not separated by whitespace.
 A CPU must have the bit corresponding to its logical id set in this bitmask
 for events that occur on it to be logged.
 This mask can be set by the
 .Xr loader 8
 via the
 .Va debug.ktr.cpumask
 environment variable.
 It can also be examined and set after booting via the
 .Va debug.ktr.cpumask
 sysctl.
 By default, only CPUs specified in
 .Dv KTR_CPUMASK
 will log events.
 See
 .Pa sys/conf/NOTES
 for more information.
 .Ss Verbose Mode
 By default, events are only logged to the internal buffer for examination
 later, but if the verbose flag is set then they are dumped to the kernel
 console as well.
 This flag can also be set from the loader via the
 .Va debug.ktr.verbose
 environment variable, or it can be examined and set after booting via the
 .Va debug.ktr.verbose
 sysctl.
 If the flag is set to zero, which is the default, then verbose output is
 disabled.
 If the flag is set to one, then the contents of the log message and the CPU
 number are printed to the kernel console.
 If the flag is greater than one, then the filename and line number of the
 event are output to the console in addition to the log message and the CPU
 number.
 The
 .Dv KTR_VERBOSE
 option sets the flag to one.
 .Ss Examining the Events
 The KTR buffer can be examined from within
 .Xr ddb 4
 via the
 .Ic show ktr Op Cm /vV
 command.
 This command displays the contents of the trace buffer one page at a time.
 At the
 .Dq Li --more--
 prompt, the Enter key displays one more entry and prompts again.
 The spacebar displays another page of entries.
 Any other key quits.
 By default the timestamp, filename, and line number are not displayed with
 each log entry.
 If the
 .Cm /v
 modifier is specified, then they are displayed in addition to the normal
 output.
 If the
 .Cm /V
 modifier is specified, then just the timestamp is displayed in
 addition to the normal output.
 Note that the events are displayed in reverse chronological order.
 That is, the most recent events are displayed first.
 .Ss Logging ktr to Disk
 The
 .Dv KTR_ALQ
 option can be used to log
 .Nm
 entries to disk for post analysis using the
 .Xr ktrdump 8
 utility.
 This option depends on the
 .Dv ALQ
 option.
 Due to the potentially high volume of trace messages the trace mask should be
 selected carefully.
 This feature is configured through a group of sysctls.
 .Bl -tag -width ".Va debug.ktr.alq_enable"
 .It Va debug.ktr.alq_file
 displays or sets the file that
 .Nm
 will log to.
 By default its value is
 .Pa /tmp/ktr.out .
 If the file name is changed while
 .Nm
 is enabled it will not take effect until
 the next invocation.
 .It Va debug.ktr.alq_enable
 enables logging of
 .Nm
 entries to disk if it is set to one.
 Setting this to 0 will terminate logging to disk and revert to
 logging to the normal ktr ring buffer.
 Data is not sent to the ring buffer while logging to disk.
 .It Va debug.ktr.alq_max
 is the maximum number of entries that will be recorded to disk, or 0 for
 infinite.
 This is helpful for limiting the number of particularly high frequency entries
 that are recorded.
 .It Va debug.ktr.alq_depth
 determines the number of entries in the write buffer.
 This is the buffer that holds entries before they are written to disk and
 defaults to the value of the
 .Dv KTR_ENTRIES
 option.
 .It Va debug.ktr.alq_failed
 records the number of times we failed to write an entry due to overflowing the
 write buffer.
 This may happen if the frequency of the logged
 .Nm
 messages outpaces the depth
 of the queue.
 .It Va debug.ktr.alq_cnt
 records the number of entries that have currently been written to disk.
 .El
 .Sh SEE ALSO
 .Xr ktrdump 8 ,
 .Xr alq 9 ,
 .Xr ktr 9
 .Sh HISTORY
 The KTR kernel tracing facility first appeared in
 .Bsx 3.0
 and was imported into
 .Fx 5.0 .
Index: head/share/man/man4/witness.4
===================================================================
--- head/share/man/man4/witness.4	(revision 344854)
+++ head/share/man/man4/witness.4	(revision 344855)
@@ -1,188 +1,187 @@
 .\" Copyright (c) 2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 18, 2015
 .Dt WITNESS 4
 .Os
 .Sh NAME
 .Nm witness
 .Nd lock validation facility
 .Sh SYNOPSIS
 .Cd options WITNESS
 .Cd options WITNESS_COUNT
 .Cd options WITNESS_KDB
 .Cd options WITNESS_NO_VNODE
 .Cd options WITNESS_SKIPSPIN
 .Sh DESCRIPTION
 The
 .Nm
 module keeps track of the locks acquired and released by each thread.
 It also keeps track of the order in which locks are acquired with respect
 to each other.
 Each time a lock is acquired,
 .Nm
 uses these two lists to verify that a lock is not being acquired in the
 wrong order.
 If a lock order violation is detected, then a message is output to the
 kernel console or log detailing the locks involved and the locations in
 question.
 Witness can also be configured to drop into the kernel debugger when an order
 violation occurs.
 .Pp
 The
 .Nm
 code also checks various other conditions such as verifying that one
 does not recurse on a non-recursive lock,
 or attempt an upgrade on a shared lock held by another thread.
 If any of these checks fail, then the kernel will panic.
 .Pp
 The
 .Dv WITNESS_COUNT
 kernel option controls the maximum number of
 .Nm
 entries that are tracked in the kernel.
 The maximum number of entries can be queried via the
 .Va debug.witness.count
 sysctl.
 It can also be set from the
 .Xr loader 8
 via the
 .Va debug.witness.count
 environment variable.
 .Pp
 The
 .Dv WITNESS_NO_VNODE
 kernel option tells
 .Nm
 to ignore locking issues between
 .Xr vnode 9
 objects.
 .Pp
 The flag that controls whether or not the kernel debugger is entered when a
 lock order violation is detected can be set in a variety of ways.
 By default, the flag is off, but if the
 .Dv WITNESS_KDB
 kernel option is
 specified, then the flag will default to on.
 It can also be set from the
 .Xr loader 8
 via the
 .Va debug.witness.kdb
 environment variable or after the kernel has booted via the
 .Va debug.witness.kdb
 sysctl.
 If the flag is set to zero, then the debugger will not be entered.
 If the flag is non-zero, then the debugger will be entered.
 .Pp
 The
 .Nm
 code can also be configured to skip all checks on spin mutexes.
 By default, this flag defaults to off, but it can be turned on by
 specifying the
 .Dv WITNESS_SKIPSPIN
 kernel option.
 The flag can also be set via the
 .Xr loader 8
 environment variable
 .Va debug.witness.skipspin .
 If the variable is set to a non-zero value, then spin mutexes are skipped.
 Once the kernel has booted, the status of this flag can be examined but not
 set via the read-only sysctl
 .Va debug.witness.skipspin .
 .Pp
 The sysctl
 .Va debug.witness.watch
 specifies the level of witness involvement in the system.
 A value of 1 specifies that witness is enabled.
 A value of 0 specifies that witness is disabled, but that can be enabled
 again.
 This will maintain a small amount of overhead in the system.
 A value of -1 specifies that witness is disabled permanently and
 cannot be enabled again.
 The sysctl
 .Va debug.witness.watch
 can be set via
 .Xr loader 8 .
 .Pp
 The sysctl
 .Va debug.witness.output_channel
 specifies the output channel used to display warnings emitted by
 .Nm .
 The possible values are
 .Ql console ,
 indicating that warnings are to be printed to the system console,
 .Ql log ,
 indicating that warnings are to be logged via
 .Xr log 9 ,
 and
 .Ql none .
 This sysctl can be set via
 .Xr loader 8 .
 .Pp
 The
 .Nm
 code also provides three extra
 .Xr ddb 4
 commands if both
 .Nm
 and
 .Xr ddb 4
 are compiled into the kernel:
 .Bl -ohang
 .It Ic show locks Op thread
 Outputs the list of locks held by a thread to the kernel console
 along with the filename and line number at which each lock was last acquired
 by the thread.
 The optional
 .Ar thread
 argument may be either a TID,
 PID,
 or pointer to a thread structure.
 If
 .Ar thread
 is not specified,
 then the locks held by the current thread are displayed.
 .It Ic show all locks
 Outputs the list of locks held by all threads in the system to the
 kernel console.
 .It Ic show witness
 Dump the current order list to the kernel console.
 The code first displays the lock order tree for all of the sleep locks.
 Then it displays the lock order tree for all of the spin locks.
 Finally, it displays a list of locks that have not yet been acquired.
 .El
 .Sh SEE ALSO
 .Xr ddb 4 ,
 .Xr loader 8 ,
 .Xr sysctl 8 ,
 .Xr mutex 9
 .Sh HISTORY
 The
 .Nm
 code first appeared in
 .Bsx 5.0
 and was imported from there into
 .Fx 5.0 .
Index: head/share/man/man9/BUS_GET_CPUS.9
===================================================================
--- head/share/man/man9/BUS_GET_CPUS.9	(revision 344854)
+++ head/share/man/man9/BUS_GET_CPUS.9	(revision 344855)
@@ -1,101 +1,100 @@
 .\" -*- nroff -*-
 .\"
 .\" Copyright (c) 2016 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd March 1, 2016
 .Dt BUS_GET_CPUS 9
 .Os
 .Sh NAME
 .Nm BUS_GET_CPUS ,
 .Nm bus_get_cpus
 .Nd "request a set of device-specific CPUs"
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
 .In sys/cpuset.h
 .Ft int
 .Fo BUS_GET_CPUS
 .Fa "device_t dev" "device_t child" "enum cpu_sets op" "size_t setsize"
 .Fa "cpuset_t *cpuset"
 .Fc
 .Ft int
 .Fo bus_get_cpus
 .Fa "device_t dev" "enum cpu_sets op" "size_t setsize" "cpuset_t *cpuset"
 .Fc
 .Sh DESCRIPTION
 The
 .Fn BUS_GET_CPUS
 method queries the parent bus device for a set of device-specific CPUs.
 The
 .Fa op
 argument specifies which set of CPUs to retrieve.
 If successful,
 the requested set of CPUs are returned in
 .Fa cpuset .
 The
 .Fa setsize
 argument specifies the size in bytes of the set passed in
 .Fa cpuset .
 .Pp
 .Fn BUS_GET_CPUS
 supports querying different types of CPU sets via the
 .Fa op argument.
 Not all set types are supported for every device.
 If a set type is not supported,
 .Fn BUS_GET_CPUS
 fails with
 .Er EINVAL .
 These set types are supported:
 .Bl -tag -width ".Dv LOCAL_CPUS"
 .It Dv LOCAL_CPUS
 The set of CPUs that are local to the device.
 If a device is closer to a specific memory domain in a non-uniform memory
 architecture system
 .Pq NUMA ,
 this will return the set of CPUs in that memory domain.
 .It Dv INTR_CPUS
 The preferred set of CPUs that this device should use for device interrupts.
 This set type must be supported by all bus drivers.
 .El
 .Pp
 The
 .Fn bus_get_cpus
 function is a simple wrapper around
 .Fn BUS_GET_CPUS .
 .Sh RETURN VALUES
 Zero is returned on success, otherwise an appropriate error is returned.
 .Sh SEE ALSO
 .Xr cpuset 2 ,
 .Xr BUS_BIND_INTR 9 ,
 .Xr device 9
 .Sh HISTORY
 The
 .Fn BUS_GET_CPUS
 method and
 .Fn bus_get_cpus
 function first appeared in
 .Fx 11.0 .
Index: head/share/man/man9/BUS_RESCAN.9
===================================================================
--- head/share/man/man9/BUS_RESCAN.9	(revision 344854)
+++ head/share/man/man9/BUS_RESCAN.9	(revision 344855)
@@ -1,51 +1,50 @@
 .\" -*- nroff -*-
 .\"
 .\" Copyright (c) 2016 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd April 27, 2016
 .Dt BUS_RESCAN 9
 .Os
 .Sh NAME
 .Nm BUS_RESCAN
 .Nd "rescan a bus checking for devices that have been added or removed"
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
 .Ft void
 .Fn BUS_RESCAN "device_t dev"
 .Sh DESCRIPTION
 The
 .Fn BUS_RESCAN
 method is called to request a rescan of the child devices on a bus device.
 The method should add any devices that have been added since the previous
 scan and remove devices that have been removed.
 This method is not required to re-examine existing devices to determine if
 their properties have changed.
 This method is also not required to propagate the rescan request to child
 devices.
 .Sh SEE ALSO
 .Xr device 9
Index: head/share/man/man9/atomic.9
===================================================================
--- head/share/man/man9/atomic.9	(revision 344854)
+++ head/share/man/man9/atomic.9	(revision 344855)
@@ -1,602 +1,601 @@
 .\" Copyright (c) 2000-2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd December 22, 2017
 .Dt ATOMIC 9
 .Os
 .Sh NAME
 .Nm atomic_add ,
 .Nm atomic_clear ,
 .Nm atomic_cmpset ,
 .Nm atomic_fcmpset ,
 .Nm atomic_fetchadd ,
 .Nm atomic_load ,
 .Nm atomic_readandclear ,
 .Nm atomic_set ,
 .Nm atomic_subtract ,
 .Nm atomic_store ,
 .Nm atomic_thread_fence
 .Nd atomic operations
 .Sh SYNOPSIS
 .In sys/types.h
 .In machine/atomic.h
 .Ft void
 .Fn atomic_add_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
 .Ft void
 .Fn atomic_clear_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
 .Ft int
 .Fo atomic_cmpset_[acq_|rel_]<type>
 .Fa "volatile <type> *dst"
 .Fa "<type> old"
 .Fa "<type> new"
 .Fc
 .Ft int
 .Fo atomic_fcmpset_[acq_|rel_]<type>
 .Fa "volatile <type> *dst"
 .Fa "<type> *old"
 .Fa "<type> new"
 .Fc
 .Ft <type>
 .Fn atomic_fetchadd_<type> "volatile <type> *p" "<type> v"
 .Ft <type>
 .Fn atomic_load_[acq_]<type> "volatile <type> *p"
 .Ft <type>
 .Fn atomic_readandclear_<type> "volatile <type> *p"
 .Ft void
 .Fn atomic_set_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
 .Ft void
 .Fn atomic_subtract_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
 .Ft void
 .Fn atomic_store_[rel_]<type> "volatile <type> *p" "<type> v"
 .Ft <type>
 .Fn atomic_swap_<type> "volatile <type> *p" "<type> v"
 .Ft int
 .Fn atomic_testandclear_<type> "volatile <type> *p" "u_int v"
 .Ft int
 .Fn atomic_testandset_<type> "volatile <type> *p" "u_int v"
 .Ft void
 .Fn atomic_thread_fence_[acq|acq_rel|rel|seq_cst] "void"
 .Sh DESCRIPTION
 Atomic operations are commonly used to implement reference counts and as
 building blocks for synchronization primitives, such as mutexes.
 .Pp
 All of these operations are performed
 .Em atomically
 across multiple threads and in the presence of interrupts, meaning that they
 are performed in an indivisible manner from the perspective of concurrently
 running threads and interrupt handlers.
 .Pp
 On all architectures supported by
 .Fx ,
 ordinary loads and stores of integers in cache-coherent memory are
 inherently atomic if the integer is naturally aligned and its size does not
 exceed the processor's word size.
 However, such loads and stores may be elided from the program by
 the compiler, whereas atomic operations are always performed.
 .Pp
 When atomic operations are performed on cache-coherent memory, all
 operations on the same location are totally ordered.
 .Pp
 When an atomic load is performed on a location in cache-coherent memory,
 it reads the entire value that was defined by the last atomic store to
 each byte of the location.
 An atomic load will never return a value out of thin air.
 When an atomic store is performed on a location, no other thread or
 interrupt handler will observe a
 .Em torn write ,
 or partial modification of the location.
 .Pp
 Except as noted below, the semantics of these operations are almost
 identical to the semantics of similarly named C11 atomic operations.
 .Ss Types
 Most atomic operations act upon a specific
 .Fa type .
 That type is indicated in the function name.
 In contrast to C11 atomic operations,
 .Fx Ns 's
 atomic operations are performed on ordinary integer types.
 The available types are:
 .Pp
 .Bl -tag -offset indent -width short -compact
 .It Li int
 unsigned integer
 .It Li long
 unsigned long integer
 .It Li ptr
 unsigned integer the size of a pointer
 .It Li 32
 unsigned 32-bit integer
 .It Li 64
 unsigned 64-bit integer
 .El
 .Pp
 For example, the function to atomically add two integers is called
 .Fn atomic_add_int .
 .Pp
 Certain architectures also provide operations for types smaller than
 .Dq Li int .
 .Pp
 .Bl -tag -offset indent -width short -compact
 .It Li char
 unsigned character
 .It Li short
 unsigned short integer
 .It Li 8
 unsigned 8-bit integer
 .It Li 16
 unsigned 16-bit integer
 .El
 .Pp
 These types must not be used in machine-independent code.
 .Ss Acquire and Release Operations
 By default, a thread's accesses to different memory locations might not be
 performed in
 .Em program order ,
 that is, the order in which the accesses appear in the source code.
 To optimize the program's execution, both the compiler and processor might
 reorder the thread's accesses.
 However, both ensure that their reordering of the accesses is not visible to
 the thread.
 Otherwise, the traditional memory model that is expected by single-threaded
 programs would be violated.
 Nonetheless, other threads in a multithreaded program, such as the
 .Fx
 kernel, might observe the reordering.
 Moreover, in some cases, such as the implementation of synchronization between
 threads, arbitrary reordering might result in the incorrect execution of the
 program.
 To constrain the reordering that both the compiler and processor might perform
 on a thread's accesses, a programmer can use atomic operations with
 .Em acquire
 and
 .Em release
 semantics.
 .Pp
 Atomic operations on memory have up to three variants.
 The first, or
 .Em relaxed
 variant, performs the operation without imposing any ordering constraints on
 accesses to other memory locations.
 This variant is the default.
 The second variant has acquire semantics, and the third variant has release
 semantics.
 .Pp
 When an atomic operation has acquire semantics, the operation must have
 completed before any subsequent load or store (by program order) is
 performed.
 Conversely, acquire semantics do not require that prior loads or stores have
 completed before the atomic operation is performed.
 An atomic operation can only have acquire semantics if it performs a load
 from memory.
 To denote acquire semantics, the suffix
 .Dq Li _acq
 is inserted into the function name immediately prior to the
 .Dq Li _ Ns Aq Fa type
 suffix.
 For example, to subtract two integers ensuring that the subtraction is
 completed before any subsequent loads and stores are performed, use
 .Fn atomic_subtract_acq_int .
 .Pp
 When an atomic operation has release semantics, all prior loads or stores
 (by program order) must have completed before the operation is performed.
 Conversely, release semantics do not require that the atomic operation must
 have completed before any subsequent load or store is performed.
 An atomic operation can only have release semantics if it performs a store
 to memory.
 To denote release semantics, the suffix
 .Dq Li _rel
 is inserted into the function name immediately prior to the
 .Dq Li _ Ns Aq Fa type
 suffix.
 For example, to add two long integers ensuring that all prior loads and
 stores are completed before the addition is performed, use
 .Fn atomic_add_rel_long .
 .Pp
 When a release operation by one thread
 .Em synchronizes with
 an acquire operation by another thread, usually meaning that the acquire
 operation reads the value written by the release operation, then the effects
 of all prior stores by the releasing thread must become visible to
 subsequent loads by the acquiring thread.
 Moreover, the effects of all stores (by other threads) that were visible to
 the releasing thread must also become visible to the acquiring thread.
 These rules only apply to the synchronizing threads.
 Other threads might observe these stores in a different order.
 .Pp
 In effect, atomic operations with acquire and release semantics establish
 one-way barriers to reordering that enable the implementations of
 synchronization primitives to express their ordering requirements without
 also imposing unnecessary ordering.
 For example, for a critical section guarded by a mutex, an acquire operation
 when the mutex is locked and a release operation when the mutex is unlocked
 will prevent any loads or stores from moving outside of the critical
 section.
 However, they will not prevent the compiler or processor from moving loads
 or stores into the critical section, which does not violate the semantics of
 a mutex.
 .Ss Thread Fence Operations
 Alternatively, a programmer can use atomic thread fence operations to
 constrain the reordering of accesses.
 In contrast to other atomic operations, fences do not, themselves, access
 memory.
 .Pp
 When a fence has acquire semantics, all prior loads (by program order) must
 have completed before any subsequent load or store is performed.
 Thus, an acquire fence is a two-way barrier for load operations.
 To denote acquire semantics, the suffix
 .Dq Li _acq
 is appended to the function name, for example,
 .Fn atomic_thread_fence_acq .
 .Pp
 When a fence has release semantics, all prior loads or stores (by program
 order) must have completed before any subsequent store operation is
 performed.
 Thus, a release fence is a two-way barrier for store operations.
 To denote release semantics, the suffix
 .Dq Li _rel
 is appended to the function name, for example,
 .Fn atomic_thread_fence_rel .
 .Pp
 Although
 .Fn atomic_thread_fence_acq_rel
 implements both acquire and release semantics, it is not a full barrier.
 For example, a store prior to the fence (in program order) may be completed
 after a load subsequent to the fence.
 In contrast,
 .Fn atomic_thread_fence_seq_cst
 implements a full barrier.
 Neither loads nor stores may cross this barrier in either direction.
 .Pp
 In C11, a release fence by one thread synchronizes with an acquire fence by
 another thread when an atomic load that is prior to the acquire fence (by
 program order) reads the value written by an atomic store that is subsequent
 to the release fence.
 In constrast, in FreeBSD, because of the atomicity of ordinary, naturally
 aligned loads and stores, fences can also be synchronized by ordinary loads
 and stores.
 This simplifies the implementation and use of some synchronization
 primitives in
 .Fx .
 .Pp
 Since neither a compiler nor a processor can foresee which (atomic) load
 will read the value written by an (atomic) store, the ordering constraints
 imposed by fences must be more restrictive than acquire loads and release
 stores.
 Essentially, this is why fences are two-way barriers.
 .Pp
 Although fences impose more restrictive ordering than acquire loads and
 release stores, by separating access from ordering, they can sometimes
 facilitate more efficient implementations of synchronization primitives.
 For example, they can be used to avoid executing a memory barrier until a
 memory access shows that some condition is satisfied.
 .Ss Multiple Processors
 In multiprocessor systems, the atomicity of the atomic operations on memory
 depends on support for cache coherence in the underlying architecture.
 In general, cache coherence on the default memory type,
 .Dv VM_MEMATTR_DEFAULT ,
 is guaranteed by all architectures that are supported by
 .Fx .
 For example, cache coherence is guaranteed on write-back memory by the
 .Tn amd64
 and
 .Tn i386
 architectures.
 However, on some architectures, cache coherence might not be enabled on all
 memory types.
 To determine if cache coherence is enabled for a non-default memory type,
 consult the architecture's documentation.
 .Ss Semantics
 This section describes the semantics of each operation using a C like notation.
 .Bl -hang
 .It Fn atomic_add p v
 .Bd -literal -compact
 *p += v;
 .Ed
 .It Fn atomic_clear p v
 .Bd -literal -compact
 *p &= ~v;
 .Ed
 .It Fn atomic_cmpset dst old new
 .Bd -literal -compact
 if (*dst == old) {
 	*dst = new;
 	return (1);
 } else
 	return (0);
 .Ed
 .El
 .Pp
 Some architectures do not implement the
 .Fn atomic_cmpset
 functions for the types
 .Dq Li char ,
 .Dq Li short ,
 .Dq Li 8 ,
 and
 .Dq Li 16 .
 .Bl -hang
 .It Fn atomic_fcmpset dst *old new
 .El
 .Pp
 On architectures implementing
 .Em Compare And Swap
 operation in hardware, the functionality can be described as
 .Bd -literal -offset indent -compact
 if (*dst == *old) {
 	*dst = new;
 	return (1);
 } else {
 	*old = *dst;
 	return (0);
 }
 .Ed
 On architectures which provide
 .Em Load Linked/Store Conditional
 primitive, the write to
 .Dv *dst
 might also fail for several reasons, most important of which
 is a parallel write to
 .Dv *dst
 cache line by other CPU.
 In this case
 .Fn atomic_fcmpset
 function also returns
 .Dv false ,
 despite
 .Dl *old == *dst .
 .Pp
 Some architectures do not implement the
 .Fn atomic_fcmpset
 functions for the types
 .Dq Li char ,
 .Dq Li short ,
 .Dq Li 8 ,
 and
 .Dq Li 16 .
 .Bl -hang
 .It Fn atomic_fetchadd p v
 .Bd -literal -compact
 tmp = *p;
 *p += v;
 return (tmp);
 .Ed
 .El
 .Pp
 The
 .Fn atomic_fetchadd
 functions are only implemented for the types
 .Dq Li int ,
 .Dq Li long
 and
 .Dq Li 32
 and do not have any variants with memory barriers at this time.
 .Bl -hang
 .It Fn atomic_load p
 .Bd -literal -compact
 return (*p);
 .Ed
 .It Fn atomic_readandclear p
 .Bd -literal -compact
 tmp = *p;
 *p = 0;
 return (tmp);
 .Ed
 .El
 .Pp
 The
 .Fn atomic_readandclear
 functions are not implemented for the types
 .Dq Li char ,
 .Dq Li short ,
 .Dq Li ptr ,
 .Dq Li 8 ,
 and
 .Dq Li 16
 and do not have any variants with memory barriers at this time.
 .Bl -hang
 .It Fn atomic_set p v
 .Bd -literal -compact
 *p |= v;
 .Ed
 .It Fn atomic_subtract p v
 .Bd -literal -compact
 *p -= v;
 .Ed
 .It Fn atomic_store p v
 .Bd -literal -compact
 *p = v;
 .Ed
 .It Fn atomic_swap p v
 .Bd -literal -compact
 tmp = *p;
 *p = v;
 return (tmp);
 .Ed
 .El
 .Pp
 The
 .Fn atomic_swap
 functions are not implemented for the types
 .Dq Li char ,
 .Dq Li short ,
 .Dq Li ptr ,
 .Dq Li 8 ,
 and
 .Dq Li 16
 and do not have any variants with memory barriers at this time.
 .Bl -hang
 .It Fn atomic_testandclear p v
 .Bd -literal -compact
 bit = 1 << (v % (sizeof(*p) * NBBY));
 tmp = (*p & bit) != 0;
 *p &= ~bit;
 return (tmp);
 .Ed
 .El
 .Bl -hang
 .It Fn atomic_testandset p v
 .Bd -literal -compact
 bit = 1 << (v % (sizeof(*p) * NBBY));
 tmp = (*p & bit) != 0;
 *p |= bit;
 return (tmp);
 .Ed
 .El
 .Pp
 The
 .Fn atomic_testandset
 and
 .Fn atomic_testandclear
 functions are only implemented for the types
 .Dq Li int ,
 .Dq Li long
 and
 .Dq Li 32
 and do not have any variants with memory barriers at this time.
 .Pp
 The type
 .Dq Li 64
 is currently not implemented for any of the atomic operations on the
 .Tn arm ,
 .Tn i386 ,
 and
 .Tn powerpc
 architectures.
 .Sh RETURN VALUES
 The
 .Fn atomic_cmpset
 function returns the result of the compare operation.
 The
 .Fn atomic_fcmpset
 function returns
 .Dv true
 if the operation succeeded.
 Otherwise it returns
 .Dv false
 and sets
 .Va *old
 to the found value.
 The
 .Fn atomic_fetchadd ,
 .Fn atomic_load ,
 .Fn atomic_readandclear ,
 and
 .Fn atomic_swap
 functions return the value at the specified address.
 The
 .Fn atomic_testandset
 and
 .Fn atomic_testandclear
 function returns the result of the test operation.
 .Sh EXAMPLES
 This example uses the
 .Fn atomic_cmpset_acq_ptr
 and
 .Fn atomic_set_ptr
 functions to obtain a sleep mutex and handle recursion.
 Since the
 .Va mtx_lock
 member of a
 .Vt "struct mtx"
 is a pointer, the
 .Dq Li ptr
 type is used.
 .Bd -literal
 /* Try to obtain mtx_lock once. */
 #define _obtain_lock(mp, tid)						\\
 	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
 
 /* Get a sleep lock, deal with recursion inline. */
 #define _get_sleep_lock(mp, tid, opts, file, line) do {			\\
 	uintptr_t _tid = (uintptr_t)(tid);				\\
 									\\
 	if (!_obtain_lock(mp, tid)) {					\\
 		if (((mp)->mtx_lock & MTX_FLAGMASK) != _tid)		\\
 			_mtx_lock_sleep((mp), _tid, (opts), (file), (line));\\
 		else {							\\
 			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSE);	\\
 			(mp)->mtx_recurse++;				\\
 		}							\\
 	}								\\
 } while (0)
 .Ed
 .Sh HISTORY
 The
 .Fn atomic_add ,
 .Fn atomic_clear ,
 .Fn atomic_set ,
 and
 .Fn atomic_subtract
 operations were introduced in
 .Fx 3.0 .
 Initially, these operations were defined on the types
 .Dq Li char ,
 .Dq Li short ,
 .Dq Li int ,
 and
 .Dq Li long .
 .Pp
 The
 .Fn atomic_cmpset ,
 .Fn atomic_load_acq ,
 .Fn atomic_readandclear ,
 and
 .Fn atomic_store_rel
 operations were added in
 .Fx 5.0 .
 Simultaneously, the acquire and release variants were introduced, and
 support was added for operation on the types
 .Dq Li 8 ,
 .Dq Li 16 ,
 .Dq Li 32 ,
 .Dq Li 64 ,
 and
 .Dq Li ptr .
 .Pp
 The
 .Fn atomic_fetchadd
 operation was added in
 .Fx 6.0 .
 .Pp
 The
 .Fn atomic_swap
 and
 .Fn atomic_testandset
 operations were added in
 .Fx 10.0 .
 .Pp
 The
 .Fn atomic_testandclear
 and
 .Fn atomic_thread_fence
 operations were added in
 .Fx 11.0 .
 .Pp
 The relaxed variants of
 .Fn atomic_load
 and
 .Fn atomic_store
 were added in
 .Fx 12.0 .
Index: head/share/man/man9/bus_map_resource.9
===================================================================
--- head/share/man/man9/bus_map_resource.9	(revision 344854)
+++ head/share/man/man9/bus_map_resource.9	(revision 344855)
@@ -1,166 +1,165 @@
 .\" -*- nroff -*-
 .\"
 .\" Copyright (c) 2016 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd February 5, 2018
 .Dt BUS_MAP_RESOURCE 9
 .Os
 .Sh NAME
 .Nm bus_map_resource , bus_unmap_resource , resource_init_map_request
 .Nd map or unmap an active resource
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
 .Pp
 .In machine/bus.h
 .In sys/rman.h
 .In machine/resource.h
 .Ft int
 .Fo bus_map_resource
 .Fa "device_t dev" "int type" "struct resource *r"
 .Fa "struct resource_map_request *args" "struct resource_map *map"
 .Fc
 .Ft int
 .Fo bus_unmap_resource
 .Fa "device_t dev" "int type" "struct resource *r" "struct resource_map *map"
 .Fc
 .Ft void
 .Fn resource_init_map_request "struct resource_map_request *args"
 .Sh DESCRIPTION
 These functions create or destroy a mapping of a previously activated
 resource.
 Mappings permit CPU access to the resource via the
 .Xr bus_space 9
 API.
 .Pp
 The arguments are as follows:
 .Bl -tag -width indent
 .It Fa dev
 The device that owns the resource.
 .It Fa type
 The type of resource to map.
 It is one of:
 .Pp
 .Bl -tag -width ".Dv SYS_RES_MEMORY" -compact
 .It Dv SYS_RES_IOPORT
 for I/O ports
 .It Dv SYS_RES_MEMORY
 for I/O memory
 .El
 .It Fa r
 A pointer to the
 .Vt "struct resource"
 returned by
 .Xr bus_alloc_resource 9 .
 .It Fa args
 A set of optional properties to apply when creating a mapping.
 This argument can be set to
 .Dv NULL
 to request a mapping of the entire resource with the default properties.
 .It Fa map
 The resource mapping to create or destroy.
 .El
 .Ss Resource Mappings
 Resource mappings are described by a
 .Vt "struct resource_map"
 object.
 This structure contains a
 .Xr bus_space 9
 tag and handle in the
 .Va r_bustag
 and
 .Va r_bushandle
 members that can be used for CPU access to the mapping.
 The structure also contains a
 .Va r_vaddr
 member which contains the virtual address of the mapping if one exists.
 .Pp
 The wrapper API for
 .Vt "struct resource"
 objects described in
 .Xr bus_activate_resource 9
 can also be used with
 .Vt "struct resource_map" .
 For example,
 a pointer to a mapping object can be passed as the first argument to
 .Fn bus_read_4 .
 This wrapper API is preferred over using the
 .Va r_bustag
 and
 .Va r_bushandle
 members directly.
 .Ss Optional Mapping Properties
 The
 .Vt "struct resource_map_request"
 object passed in
 .Fa args
 can be used to specify optional properties of a mapping.
 The structure must be initialized by invoking
 .Fn resource_init_map_request .
 Properties are then specified by setting one or more of these members:
 .Bl -tag -width indent
 .It Va offset , length
 These two members specify a region of the resource to map.
 By default a mapping is created for the entire resource.
 The
 .Va offset
 is relative to the start of the resource.
 .It Va memattr
 Specifies a memory attribute to use when mapping the resource.
 By default memory mappings use the
 .Dv VM_MEMATTR_UNCACHEABLE
 attribute.
 .El
 .Sh RETURN VALUES
 Zero is returned on success, otherwise an error is returned.
 .Sh EXAMPLES
 This maps a PCI memory BAR with the write-combining memory attribute and
 reads the first 32-bit word:
 .Bd -literal
 	struct resource *r;
 	struct resource_map map;
 	struct resource_map_request req;
 	uint32_t val;
 	int rid;
 
 	rid = PCIR_BAR(0);
 	r = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE |
 	    RF_UNMAPPED);
 	resource_init_map_request(&req);
 	req.memattr = VM_MEMATTR_WRITE_COMBINING;
 	bus_map_resource(dev, SYS_RES_MEMORY, r, &req, &map);
 	val = bus_read_4(&map, 0);
 .Ed
 .Sh SEE ALSO
 .Xr bus_activate_resource 9 ,
 .Xr bus_alloc_resource 9 ,
 .Xr bus_space 9 ,
 .Xr device 9 ,
 .Xr driver 9
 .Sh AUTHORS
 This manual page was written by
 .An John Baldwin Aq Mt jhb@FreeBSD.org .
Index: head/share/man/man9/critical_enter.9
===================================================================
--- head/share/man/man9/critical_enter.9	(revision 344854)
+++ head/share/man/man9/critical_enter.9	(revision 344855)
@@ -1,74 +1,73 @@
 .\" Copyright (c) 2001,2002 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd October 5, 2005
 .Dt CRITICAL_ENTER 9
 .Os
 .Sh NAME
 .Nm critical_enter ,
 .Nm critical_exit
 .Nd enter and exit a critical region
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/systm.h
 .Ft void
 .Fn critical_enter "void"
 .Ft void
 .Fn critical_exit "void"
 .Sh DESCRIPTION
 These functions are used to prevent preemption in a critical region of code.
 All that is guaranteed is that the thread currently executing on a CPU will
 not be preempted.
 Specifically, a thread in a critical region will not migrate to another
 CPU while it is in a critical region.
 The current CPU may still trigger faults and exceptions during a critical
 section; however, these faults are usually fatal.
 .Pp
 The
 .Fn critical_enter
 and
 .Fn critical_exit
 functions manage a per-thread counter to handle nested critical sections.
 If a thread is made runnable that would normally preempt the current thread
 while the current thread is in a critical section,
 then the preemption will be deferred until the current thread exits the
 outermost critical section.
 .Pp
 Note that these functions are not required to provide any inter-CPU
 synchronization, data protection, or memory ordering guarantees and thus
 should
 .Em not
 be used to protect shared data structures.
 .Pp
 These functions should be used with care as an infinite loop within a
 critical region will deadlock the CPU.
 Also, they should not be interlocked with operations on mutexes, sx locks,
 semaphores, or other synchronization primitives.
 One exception to this is that spin mutexes include a critical section,
 so in certain cases critical sections may be interlocked with spin mutexes.
 .Sh HISTORY
 These functions were introduced in
 .Fx 5.0 .
Index: head/share/man/man9/ithread.9
===================================================================
--- head/share/man/man9/ithread.9	(revision 344854)
+++ head/share/man/man9/ithread.9	(revision 344855)
@@ -1,351 +1,350 @@
 .\" Copyright (c) 2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd August 25, 2006
 .Dt ITHREAD 9
 .Os
 .Sh NAME
 .Nm ithread_add_handler ,
 .Nm ithread_create ,
 .Nm ithread_destroy ,
 .Nm ithread_priority ,
 .Nm ithread_remove_handler ,
 .Nm ithread_schedule
 .Nd kernel interrupt threads
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
 .In sys/interrupt.h
 .Ft int
 .Fo ithread_add_handler
 .Fa "struct ithd *ithread"
 .Fa "const char *name"
 .Fa "driver_intr_t handler"
 .Fa "void *arg"
 .Fa "u_char pri"
 .Fa "enum intr_type flags"
 .Fa "void **cookiep"
 .Fc
 .Ft int
 .Fo ithread_create
 .Fa "struct ithd **ithread"
 .Fa "int vector"
 .Fa "int flags"
 .Fa "void (*disable)(int)"
 .Fa "void (*enable)(int)"
 .Fa "const char *fmt"
 .Fa "..."
 .Fc
 .Ft int
 .Fn ithread_destroy "struct ithd *ithread"
 .Ft u_char
 .Fn ithread_priority "enum intr_type flags"
 .Ft int
 .Fn ithread_remove_handler "void *cookie"
 .Ft int
 .Fn ithread_schedule "struct ithd *ithread" "int do_switch"
 .Sh DESCRIPTION
 Interrupt threads are kernel threads that run a list of handlers when
 triggered by either a hardware or software interrupt.
 Each interrupt handler has a name, handler function, handler argument,
 priority, and various flags.
 Each interrupt thread maintains a list of handlers sorted by priority.
 This results in higher priority handlers being executed prior to lower
 priority handlers.
 Each thread assumes the priority of its highest priority handler for its
 process priority, or
 .Dv PRIO_MAX
 if it has no handlers.
 Interrupt threads are also associated with a single interrupt source,
 represented as a vector number.
 .Pp
 The
 .Fn ithread_create
 function creates a new interrupt thread.
 The
 .Fa ithread
 argument points to an
 .Vt struct ithd
 pointer that will point to the newly created thread upon success.
 The
 .Fa vector
 argument specifies the interrupt source to associate this thread with.
 The
 .Fa flags
 argument is a mask of properties of this thread.
 The only valid flag currently for
 .Fn ithread_create
 is
 .Dv IT_SOFT
 to specify that this interrupt thread is a software interrupt.
 The
 .Fa enable
 and
 .Fa disable
 arguments specify optional functions used to enable and disable this
 interrupt thread's interrupt source.
 The functions receive the vector corresponding to the thread's interrupt
 source as their only argument.
 The remaining arguments form a
 .Xr printf 9
 argument list that is used to build the base name of the new ithread.
 The full name of an interrupt thread is formed by concatenating the base
 name of an interrupt thread with the names of all of its interrupt handlers.
 .Pp
 The
 .Fn ithread_destroy
 function destroys a previously created interrupt thread by releasing its
 resources and arranging for the backing kernel thread to terminate.
 An interrupt thread can only be destroyed if it has no handlers remaining.
 .Pp
 The
 .Fn ithread_add_handler
 function adds a new handler to an existing interrupt thread specified by
 .Fa ithread .
 The
 .Fa name
 argument specifies a name for this handler.
 The
 .Fa handler
 and
 .Fa arg
 arguments provide the function to execute for this handler and an argument
 to pass to it.
 The
 .Fa pri
 argument specifies the priority of this handler and is used both in sorting
 it in relation to the other handlers for this thread and to specify the
 priority of the backing kernel thread.
 The
 .Fa flags
 argument can be used to specify properties of this handler as defined in
 .In sys/bus.h .
 If
 .Fa cookiep
 is not
 .Dv NULL ,
 then it will be assigned a cookie that can be used later to remove this
 handler.
 .Pp
 The
 .Fn ithread_remove_handler
 removes a handler from an interrupt thread.
 The
 .Fa cookie
 argument specifies the handler to remove from its thread.
 .Pp
 The
 .Fn ithread_schedule
 function schedules an interrupt thread to run.
 If the
 .Fa do_switch
 argument is non-zero and the interrupt thread is idle, then a context switch
 will be forced after putting the interrupt thread on the run queue.
 .Pp
 The
 .Fn ithread_priority
 function translates the
 .Dv INTR_TYPE_*
 interrupt flags into interrupt handler priorities.
 .Pp
 The interrupt flags not related to the type of a particular interrupt
 .Pq Dv INTR_TYPE_*
 can be used to specify additional properties of both hardware and software
 interrupt handlers.
 The
 .Dv INTR_EXCL
 flag specifies that this handler cannot share an interrupt thread with
 another handler.
 The
 .Dv INTR_MPSAFE
 flag specifies that this handler is MP safe in that it does not need the
 Giant mutex to be held while it is executed.
 The
 .Dv INTR_ENTROPY
 flag specifies that the interrupt source this handler is tied to is a good
 source of entropy, and thus that entropy should be gathered when an interrupt
 from the handler's source triggers.
 Presently, the
 .Dv INTR_ENTROPY
 flag is not valid for software interrupt handlers.
 .Pp
 It is not permitted to sleep in an interrupt thread; hence, any memory
 or zone allocations in an interrupt thread should be specified with the
 .Dv M_NOWAIT
 flag set.
 Any allocation errors must be handled thereafter.
 .Sh RETURN VALUES
 The
 .Fn ithread_add_handler ,
 .Fn ithread_create ,
 .Fn ithread_destroy ,
 .Fn ithread_remove_handler ,
 and
 .Fn ithread_schedule
 functions return zero on success and non-zero on failure.
 The
 .Fn ithread_priority
 function returns a process priority corresponding to the passed in interrupt
 flags.
 .Sh EXAMPLES
 The
 .Fn swi_add
 function demonstrates the use of
 .Fn ithread_create
 and
 .Fn ithread_add_handler .
 .Bd -literal -offset indent
 int
 swi_add(struct ithd **ithdp, const char *name, driver_intr_t handler,
 	    void *arg, int pri, enum intr_type flags, void **cookiep)
 {
 	struct proc *p;
 	struct ithd *ithd;
 	int error;
 
 	if (flags & INTR_ENTROPY)
 		return (EINVAL);
 
 	ithd = (ithdp != NULL) ? *ithdp : NULL;
 
 	if (ithd != NULL) {
 		if ((ithd->it_flags & IT_SOFT) == 0)
 			return(EINVAL);
 	} else {
 		error = ithread_create(&ithd, pri, IT_SOFT, NULL, NULL,
 		    "swi%d:", pri);
 		if (error)
 			return (error);
 
 		if (ithdp != NULL)
 			*ithdp = ithd;
 	}
 	return (ithread_add_handler(ithd, name, handler, arg, pri + PI_SOFT,
 		    flags, cookiep));
 }
 .Ed
 .Sh ERRORS
 The
 .Fn ithread_add_handler
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 Any of the
 .Fa ithread ,
 .Fa handler ,
 or
 .Fa name
 arguments are
 .Dv NULL .
 .It Bq Er EINVAL
 The
 .Dv INTR_EXCL
 flag is specified and the interrupt thread
 .Fa ithread
 already has at least one handler, or the interrupt thread
 .Fa ithread
 already has an exclusive handler.
 .It Bq Er ENOMEM
 Could not allocate needed memory for this handler.
 .El
 .Pp
 The
 .Fn ithread_create
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EAGAIN
 The system-imposed limit on the total
 number of processes under execution would be exceeded.
 The limit is given by the
 .Xr sysctl 3
 MIB variable
 .Dv KERN_MAXPROC .
 .It Bq Er EINVAL
 A flag other than
 .Dv IT_SOFT
 was specified in the
 .Fa flags
 parameter.
 .It Bq Er ENOMEM
 Could not allocate needed memory for this interrupt thread.
 .El
 .Pp
 The
 .Fn ithread_destroy
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 The
 .Fa ithread
 argument is
 .Dv NULL .
 .It Bq Er EINVAL
 The interrupt thread pointed to by
 .Fa ithread
 has at least one handler.
 .El
 .Pp
 The
 .Fn ithread_remove_handler
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 The
 .Fa cookie
 argument is
 .Dv NULL .
 .El
 .Pp
 The
 .Fn ithread_schedule
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 The
 .Fa ithread
 argument is
 .Dv NULL .
 .It Bq Er EINVAL
 The interrupt thread pointed to by
 .Fa ithread
 has no interrupt handlers.
 .El
 .Sh SEE ALSO
 .Xr kthread 9 ,
 .Xr malloc 9 ,
 .Xr swi 9 ,
 .Xr uma 9
 .Sh HISTORY
 Interrupt threads and their corresponding API first appeared in
 .Fx 5.0 .
 .Sh BUGS
 Currently
 .Vt struct ithd
 represents both an interrupt source and an interrupt thread.
 There should be a separate
 .Vt struct isrc
 that contains a vector number, enable and disable functions, etc.\& that
 an ithread holds a reference to.
Index: head/share/man/man9/ktr.9
===================================================================
--- head/share/man/man9/ktr.9	(revision 344854)
+++ head/share/man/man9/ktr.9	(revision 344855)
@@ -1,162 +1,161 @@
 .\" Copyright (c) 2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 30, 2008
 .Dt KTR 9
 .Os
 .Sh NAME
 .Nm CTR0 , CTR1 , CTR2 , CTR3 , CTR4 , CTR5
 .Nd kernel tracing facility
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/ktr.h
 .Vt "extern int ktr_cpumask" ;
 .Vt "extern int ktr_entries" ;
 .Vt "extern int ktr_extend" ;
 .Vt "extern int ktr_mask" ;
 .Vt "extern int ktr_verbose" ;
 .Vt "extern struct ktr_entry ktr_buf[]" ;
 .Ft void
 .Fn CTR0 "u_int mask" "char *format"
 .Ft void
 .Fn CTR1 "u_int mask" "char *format" "arg1"
 .Ft void
 .Fn CTR2 "u_int mask" "char *format" "arg1" "arg2"
 .Ft void
 .Fn CTR3 "u_int mask" "char *format" "arg1" "arg2" "arg3"
 .Ft void
 .Fn CTR4 "u_int mask" "char *format" "arg1" "arg2" "arg3" "arg4"
 .Ft void
 .Fn CTR5 "u_int mask" "char *format" "arg1" "arg2" "arg3" "arg4" "arg5"
 .Ft void
 .Fn CTR6 "u_int mask" "char *format" "arg1" "arg2" "arg3" "arg4" "arg5" "arg6"
 .Sh DESCRIPTION
 KTR provides a circular buffer of events that can be logged in a
 .Xr printf 9
 style
 fashion.
 These events can then be dumped with
 .Xr ddb 4 ,
 .Xr gdb 1
 or
 .Xr ktrdump 8 .
 .Pp
 Events are created and logged in the kernel via the
 .Dv CTR Ns Ar x
 macros.
 The first parameter is a mask of event types
 .Pq Dv KTR_*
 defined in
 .In sys/ktr.h .
 The event will be logged only if any of the event types specified in
 .Fa mask
 are enabled in the global event mask stored in
 .Va ktr_mask .
 The
 .Fa format
 argument is a
 .Xr printf 9
 style format string used to build the text of the event log message.
 Following the
 .Fa format
 string are zero to five arguments referenced by
 .Fa format .
 Each event is logged with a file name and source line number of the
 originating CTR call, and a timestamp in addition to the log message.
 .Pp
 The event is stored in the circular buffer with supplied arguments as is,
 and formatting is done at the dump time.
 Do not use pointers to the objects with limited lifetime, for instance,
 strings, because the pointer may become invalid when buffer is printed.
 .Pp
 Note that the different macros differ only in the number of arguments each
 one takes, as indicated by its name.
 .Pp
 The
 .Va ktr_entries
 variable contains the number of entries in the
 .Va ktr_buf
 array.
 These variables are mostly useful for post-mortem crash dump tools to locate
 the base of the circular trace buffer and its length.
 .Pp
 The
 .Va ktr_mask
 variable contains the run time mask of events to log.
 .Pp
 The CPU event mask is stored in the
 .Va ktr_cpumask
 variable.
 .Pp
 The
 .Va ktr_verbose
 variable stores the verbose flag that controls whether events are logged to
 the console in addition to the event buffer.
 .Sh EXAMPLES
 This example demonstrates the use of tracepoints at the
 .Dv KTR_PROC
 logging level.
 .Bd -literal
 void
 mi_switch()
 {
 	...
 	/*
 	 * Pick a new current process and record its start time.
 	 */
 	...
 	CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d)", p, p->p_pid);
 	...
 	cpu_switch();
 	...
 	CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d)", p, p->p_pid);
 	...
 }
 .Ed
 .Sh SEE ALSO
 .Xr ktr 4 ,
 .Xr ktrdump 8
 .Sh HISTORY
 The KTR kernel tracing facility first appeared in
 .Bsx 3.0
 and was imported into
 .Fx 5.0 .
 .Sh BUGS
 Currently there is one global buffer shared among all CPUs.
 It might be profitable at some point in time to use per-CPU buffers instead
 so that if one CPU halts or starts spinning, then the log messages it
 emitted just prior to halting or spinning will not be drowned out by events
 from the other CPUs.
 .Pp
 The arguments given in
 .Fn CTRx
 macros are stored as
 .Vt u_long ,
 so do not pass arguments larger than size of an
 .Vt u_long
 type.
 For example passing 64bit arguments on 32bit architectures will give incorrect
 results.
Index: head/share/man/man9/runqueue.9
===================================================================
--- head/share/man/man9/runqueue.9	(revision 344854)
+++ head/share/man/man9/runqueue.9	(revision 344855)
@@ -1,137 +1,136 @@
 .\" Copyright (c) 2000-2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd August 15, 2010
 .Dt RUNQUEUE 9
 .Os
 .Sh NAME
 .Nm choosethread ,
 .Nm procrunnable ,
 .Nm remrunqueue ,
 .Nm setrunqueue
 .Nd manage the queue of runnable processes
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/proc.h
 .Vt "extern struct rq itqueues[]" ;
 .Vt "extern struct rq rtqueues[]" ;
 .Vt "extern struct rq queues[]" ;
 .Vt "extern struct rq idqueues[]" ;
 .Ft struct thread *
 .Fn choosethread "void"
 .Ft int
 .Fn procrunnable "void"
 .Ft void
 .Fn remrunqueue "struct thread *td"
 .Ft void
 .Fn setrunqueue "struct thread *td"
 .Sh DESCRIPTION
 The run queue consists of four priority queues:
 .Va itqueues
 for interrupt threads,
 .Va rtqueues
 for realtime priority processes,
 .Va queues
 for time sharing processes, and
 .Va idqueues
 for idle priority processes.
 Each priority queue consists of an array of
 .Dv NQS
 queue header structures.
 Each queue header identifies a list of runnable processes of equal priority.
 Each queue also has a single word that contains a bit mask identifying
 non-empty queues to assist in selecting a process quickly.
 These are named
 .Va itqueuebits ,
 .Va rtqueuebits ,
 .Va queuebits ,
 and
 .Va idqueuebits .
 The run queues are protected by the
 .Va sched_lock
 mutex.
 .Pp
 .Fn procrunnable
 returns zero if there are no runnable processes other than the idle process.
 If there is at least one runnable process other than the idle process, it
 will return a non-zero value.
 Note that the
 .Va sched_lock
 mutex does
 .Em not
 need to be held when this function is called.
 There is a small race window where one CPU may place a process on the run queue
 when there are currently no other runnable processes while another CPU is
 calling this function.
 In that case the second CPU will simply travel through the idle loop one
 additional time before noticing that there is a runnable process.
 This works because idle CPUs are not halted in SMP systems.
 If idle CPUs are halted in SMP systems, then this race condition might have
 more serious repercussions in the losing case, and
 .Fn procrunnable
 may have to require that the
 .Va sched_lock
 mutex be acquired.
 .Pp
 .Fn choosethread
 returns the highest priority runnable thread.
 If there are no runnable threads, then the idle thread is returned.
 This function is called by
 .Fn cpu_switch
 and
 .Fn cpu_throw
 to determine which thread to switch to.
 .Fn choosethread
 must be called with the
 .Va sched_lock
 mutex held.
 .Pp
 .Fn setrunqueue
 adds the thread
 .Fa td
 to the tail of the appropriate queue in the proper priority queue.
 The thread must be runnable, i.e.\&
 .Va p_stat
 must be set to
 .Dv SRUN .
 This function must be called with the
 .Va sched_lock
 mutex held.
 .Pp
 .Fn remrunqueue
 removes thread
 .Fa td
 from its run queue.
 If
 .Fa td
 is not on a run queue, then the kernel will
 .Xr panic 9 .
 This function must be called with the
 .Va sched_lock
 mutex held.
 .Sh SEE ALSO
 .Xr cpu_switch 9 ,
 .Xr scheduler 9 ,
 .Xr sleepqueue 9
Index: head/share/man/man9/scheduler.9
===================================================================
--- head/share/man/man9/scheduler.9	(revision 344854)
+++ head/share/man/man9/scheduler.9	(revision 344855)
@@ -1,276 +1,275 @@
 .\" Copyright (c) 2000-2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 3, 2000
 .Dt SCHEDULER 9
 .Os
 .Sh NAME
 .Nm curpriority_cmp ,
 .Nm maybe_resched ,
 .Nm resetpriority ,
 .Nm roundrobin ,
 .Nm roundrobin_interval ,
 .Nm sched_setup ,
 .Nm schedclock ,
 .Nm schedcpu ,
 .Nm setrunnable ,
 .Nm updatepri
 .Nd perform round-robin scheduling of runnable processes
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/proc.h
 .Ft int
 .Fn curpriority_cmp "struct proc *p"
 .Ft void
 .Fn maybe_resched "struct thread *td"
 .Ft void
 .Fn propagate_priority "struct proc *p"
 .Ft void
 .Fn resetpriority "struct ksegrp *kg"
 .Ft void
 .Fn roundrobin "void *arg"
 .Ft int
 .Fn roundrobin_interval "void"
 .Ft void
 .Fn sched_setup "void *dummy"
 .Ft void
 .Fn schedclock "struct thread *td"
 .Ft void
 .Fn schedcpu "void *arg"
 .Ft void
 .Fn setrunnable "struct thread *td"
 .Ft void
 .Fn updatepri "struct thread *td"
 .Sh DESCRIPTION
 Each process has three different priorities stored in
 .Vt "struct proc" :
 .Va p_usrpri ,
 .Va p_nativepri ,
 and
 .Va p_priority .
 .Pp
 The
 .Va p_usrpri
 member is the user priority of the process calculated from a process'
 estimated CPU time and nice level.
 .Pp
 The
 .Va p_nativepri
 member is the saved priority used by
 .Fn propagate_priority .
 When a process obtains a mutex, its priority is saved in
 .Va p_nativepri .
 While it holds the mutex, the process's priority may be bumped by another
 process that blocks on the mutex.
 When the process releases the mutex, then its priority is restored to the
 priority saved in
 .Va p_nativepri .
 .Pp
 The
 .Va p_priority
 member is the actual priority of the process and is used to determine what
 .Xr runqueue 9
 it runs on, for example.
 .Pp
 The
 .Fn curpriority_cmp
 function compares the cached priority of the currently running process with
 process
 .Fa p .
 If the currently running process has a higher priority, then it will return
 a value less than zero.
 If the current process has a lower priority, then it will return a value
 greater than zero.
 If the current process has the same priority as
 .Fa p ,
 then
 .Fn curpriority_cmp
 will return zero.
 The cached priority of the currently running process is updated when a process
 resumes from
 .Xr tsleep 9
 or returns to userland in
 .Fn userret
 and is stored in the private variable
 .Va curpriority .
 .Pp
 The
 .Fn maybe_resched
 function compares the priorities of the current thread and
 .Fa td .
 If
 .Fa td
 has a higher priority than the current thread, then a context switch is
 needed, and
 .Dv KEF_NEEDRESCHED
 is set.
 .Pp
 The
 .Fn propagate_priority
 looks at the process that owns the mutex
 .Fa p
 is blocked on.
 That process's priority is bumped to the priority of
 .Fa p
 if needed.
 If the process is currently running, then the function returns.
 If the process is on a
 .Xr runqueue 9 ,
 then the process is moved to the appropriate
 .Xr runqueue 9
 for its new priority.
 If the process is blocked on a mutex, its position in the list of
 processes blocked on the mutex in question is updated to reflect its new
 priority.
 Then, the function repeats the procedure using the process that owns the
 mutex just encountered.
 Note that a process's priorities are only bumped to the priority of the
 original process
 .Fa p ,
 not to the priority of the previously encountered process.
 .Pp
 The
 .Fn resetpriority
 function recomputes the user priority of the ksegrp
 .Fa kg
 (stored in
 .Va kg_user_pri )
 and calls
 .Fn maybe_resched
 to force a reschedule of each thread in the group if needed.
 .Pp
 The
 .Fn roundrobin
 function is used as a
 .Xr timeout 9
 function to force a reschedule every
 .Va sched_quantum
 ticks.
 .Pp
 The
 .Fn roundrobin_interval
 function simply returns the number of clock ticks in between reschedules
 triggered by
 .Fn roundrobin .
 Thus, all it does is return the current value of
 .Va sched_quantum .
 .Pp
 The
 .Fn sched_setup
 function is a
 .Xr SYSINIT 9
 that is called to start the callout driven scheduler functions.
 It just calls the
 .Fn roundrobin
 and
 .Fn schedcpu
 functions for the first time.
 After the initial call, the two functions will propagate themselves by
 registering their callout event again at the completion of the respective
 function.
 .Pp
 The
 .Fn schedclock
 function is called by
 .Fn statclock
 to adjust the priority of the currently running thread's ksegrp.
 It updates the group's estimated CPU time and then adjusts the priority via
 .Fn resetpriority .
 .Pp
 The
 .Fn schedcpu
 function updates all process priorities.
 First, it updates statistics that track how long processes have been in various
 process states.
 Secondly, it updates the estimated CPU time for the current process such
 that about 90% of the CPU usage is forgotten in 5 * load average seconds.
 For example, if the load average is 2.00,
 then at least 90% of the estimated CPU time for the process should be based
 on the amount of CPU time the process has had in the last 10 seconds.
 It then recomputes the priority of the process and moves it to the
 appropriate
 .Xr runqueue 9
 if necessary.
 Thirdly, it updates the %CPU estimate used by utilities such as
 .Xr ps 1
 and
 .Xr top 1
 so that 95% of the CPU usage is forgotten in 60 seconds.
 Once all process priorities have been updated,
 .Fn schedcpu
 calls
 .Fn vmmeter
 to update various other statistics including the load average.
 Finally, it schedules itself to run again in
 .Va hz
 clock ticks.
 .Pp
 The
 .Fn setrunnable
 function is used to change a process's state to be runnable.
 The process is placed on a
 .Xr runqueue 9
 if needed, and the swapper process is woken up and told to swap the process in
 if the process is swapped out.
 If the process has been asleep for at least one run of
 .Fn schedcpu ,
 then
 .Fn updatepri
 is used to adjust the priority of the process.
 .Pp
 The
 .Fn updatepri
 function is used to adjust the priority of a process that has been asleep.
 It retroactively decays the estimated CPU time of the process for each
 .Fn schedcpu
 event that the process was asleep.
 Finally, it calls
 .Fn resetpriority
 to adjust the priority of the process.
 .Sh SEE ALSO
 .Xr mi_switch 9 ,
 .Xr runqueue 9 ,
 .Xr sleepqueue 9 ,
 .Xr tsleep 9
 .Sh BUGS
 The
 .Va curpriority
 variable really should be per-CPU.
 In addition,
 .Fn maybe_resched
 should compare the priority of
 .Fa chk
 with that of each CPU, and then send an IPI to the processor with the lowest
 priority to trigger a reschedule if needed.
 .Pp
 Priority propagation is broken and is thus disabled by default.
 The
 .Va p_nativepri
 variable is only updated if a process does not obtain a sleep mutex on the
 first try.
 Also, if a process obtains more than one sleep mutex in this manner, and
 had its priority bumped in between, then
 .Va p_nativepri
 will be clobbered.
Index: head/share/man/man9/sleepqueue.9
===================================================================
--- head/share/man/man9/sleepqueue.9	(revision 344854)
+++ head/share/man/man9/sleepqueue.9	(revision 344855)
@@ -1,390 +1,389 @@
 .\" Copyright (c) 2000-2004 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd September 22, 2014
 .Dt SLEEPQUEUE 9
 .Os
 .Sh NAME
 .Nm init_sleepqueues ,
 .Nm sleepq_abort ,
 .Nm sleepq_add ,
 .Nm sleepq_alloc ,
 .Nm sleepq_broadcast ,
 .Nm sleepq_free ,
 .Nm sleepq_lock ,
 .Nm sleepq_lookup ,
 .Nm sleepq_release ,
 .Nm sleepq_remove ,
 .Nm sleepq_signal ,
 .Nm sleepq_set_timeout ,
 .Nm sleepq_set_timeout_sbt ,
 .Nm sleepq_sleepcnt ,
 .Nm sleepq_timedwait ,
 .Nm sleepq_timedwait_sig ,
 .Nm sleepq_type ,
 .Nm sleepq_wait ,
 .Nm sleepq_wait_sig
 .Nd manage the queues of sleeping threads
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/sleepqueue.h
 .Ft void
 .Fn init_sleepqueues "void"
 .Ft int
 .Fn sleepq_abort "struct thread *td"
 .Ft void
 .Fn sleepq_add "void *wchan" "struct lock_object *lock" "const char *wmesg" "int flags" "int queue"
 .Ft struct sleepqueue *
 .Fn sleepq_alloc "void"
 .Ft int
 .Fn sleepq_broadcast "void *wchan" "int flags" "int pri" "int queue"
 .Ft void
 .Fn sleepq_free "struct sleepqueue *sq"
 .Ft struct sleepqueue *
 .Fn sleepq_lookup "void *wchan"
 .Ft void
 .Fn sleepq_lock "void *wchan"
 .Ft void
 .Fn sleepq_release "void *wchan"
 .Ft void
 .Fn sleepq_remove "struct thread *td" "void *wchan"
 .Ft int
 .Fn sleepq_signal "void *wchan" "int flags" "int pri" "int queue"
 .Ft void
 .Fn sleepq_set_timeout "void *wchan" "int timo"
 .Ft void
 .Fn sleepq_set_timeout_sbt "void *wchan" "sbintime_t sbt" \
 "sbintime_t pr" "int flags"
 .Ft u_int
 .Fn sleepq_sleepcnt "void *wchan" "int queue"
 .Ft int
 .Fn sleepq_timedwait "void *wchan" "int pri"
 .Ft int
 .Fn sleepq_timedwait_sig "void *wchan" "int pri"
 .Ft int
 .Fn sleepq_type "void *wchan"
 .Ft void
 .Fn sleepq_wait "void *wchan" "int pri"
 .Ft int
 .Fn sleepq_wait_sig "void *wchan" "int pri"
 .Sh DESCRIPTION
 Sleep queues provide a mechanism for suspending execution of a thread until
 some condition is met.
 Each queue is associated with a specific wait channel when it is active,
 and only one queue may be associated with a wait channel at any given point
 in time.
 The implementation of each wait channel splits its sleepqueue into 2 sub-queues
 in order to enable some optimizations on threads' wakeups.
 An active queue holds a list of threads that are blocked on the associated
 wait channel.
 Threads that are not blocked on a wait channel have an associated inactive
 sleep queue.
 When a thread blocks on a wait channel it donates its inactive sleep queue
 to the wait channel.
 When a thread is resumed,
 the wait channel that it was blocked on gives it an inactive sleep queue for
 later use.
 .Pp
 The
 .Fn sleepq_alloc
 function allocates an inactive sleep queue and is used to assign a
 sleep queue to a thread during thread creation.
 The
 .Fn sleepq_free
 function frees the resources associated with an inactive sleep queue and is
 used to free a queue during thread destruction.
 .Pp
 Active sleep queues are stored in a hash table hashed on the addresses pointed
 to by wait channels.
 Each bucket in the hash table contains a sleep queue chain.
 A sleep queue chain contains a spin mutex and a list of sleep queues that hash
 to that specific chain.
 Active sleep queues are protected by their chain's spin mutex.
 The
 .Fn init_sleepqueues
 function initializes the hash table of sleep queue chains.
 .Pp
 The
 .Fn sleepq_lock
 function locks the sleep queue chain associated with wait channel
 .Fa wchan .
 .Pp
 The
 .Fn sleepq_lookup
 returns a pointer to the currently active sleep queue for that wait
 channel associated with
 .Fa wchan
 or
 .Dv NULL
 if there is no active sleep queue associated with
 argument
 .Fa wchan .
 It requires the sleep queue chain associated with
 .Fa wchan
 to have been locked by a prior call to
 .Fn sleepq_lock .
 .Pp
 The
 .Fn sleepq_release
 function unlocks the sleep queue chain associated with
 .Fn wchan
 and is primarily useful when aborting a pending sleep request before one of
 the wait functions is called.
 .Pp
 The
 .Fn sleepq_add
 function places the current thread on the sleep queue associated with the
 wait channel
 .Fa wchan .
 The sleep queue chain associated with argument
 .Fa wchan
 must be locked by a prior call to
 .Fn sleepq_lock
 when this function is called.
 If a lock is specified via the
 .Fa lock
 argument, and if the kernel was compiled with
 .Cd "options INVARIANTS" ,
 then the sleep queue code will perform extra checks to ensure that
 the lock is used by all threads sleeping on
 .Fa wchan .
 The
 .Fa wmesg
 parameter should be a short description of
 .Fa wchan .
 The
 .Fa flags
 parameter is a bitmask consisting of the type of sleep queue being slept on
 and zero or more optional flags.
 The
 .Fa queue
 parameter specifies the sub-queue, in which the contending thread will be
 inserted.
 .Pp
 There are currently three types of sleep queues:
 .Pp
 .Bl -tag -width ".Dv SLEEPQ_CONDVAR" -compact
 .It Dv SLEEPQ_CONDVAR
 A sleep queue used to implement condition variables.
 .It Dv SLEEPQ_SLEEP
 A sleep queue used to implement
 .Xr sleep 9 ,
 .Xr wakeup 9
 and
 .Xr wakeup_one 9 .
 .It Dv SLEEPQ_PAUSE
 A sleep queue used to implement
 .Xr pause 9 .
 .El
 .Pp
 There are currently two optional flag:
 .Pp
 .Bl -tag -width ".Dv SLEEPQ_INTERRUPTIBLE" -compact
 .It Dv SLEEPQ_INTERRUPTIBLE
 The current thread is entering an interruptible sleep.
 .El
 .Bl -tag -width ".Dv SLEEPQ_STOP_ON_BDRY" -compact
 .It Dv SLEEPQ_STOP_ON_BDRY
 When thread is entering an interruptible sleep, do not stop it upon
 arrival of stop action, like
 .Dv SIGSTOP .
 Wake it up instead.
 .El
 .Pp
 A timeout on the sleep may be specified by calling
 .Fn sleepq_set_timeout
 after
 .Fn sleepq_add .
 The
 .Fa wchan
 parameter should be the same value from the preceding call to
 .Fn sleepq_add ,
 and the sleep queue chain associated with
 .Fa wchan
 must have been locked by a prior call to
 .Fn sleepq_lock .
 The
 .Fa timo
 parameter should specify the timeout value in ticks.
 .Pp
 .Fn sleepq_set_timeout_sbt
 function takes
 .Fa sbt
 argument instead of
 .Fa timo .
 It allows to specify relative or absolute wakeup time with higher resolution
 in form of
 .Vt sbintime_t .
 The parameter
 .Fa pr
 allows to specify wanted absolute event precision.
 The parameter
 .Fa flags
 allows to pass additional
 .Fn callout_reset_sbt
 flags.
 .Pp
 Once the thread is ready to suspend,
 one of the wait functions is called to put the current thread to sleep
 until it is awakened and to context switch to another thread.
 The
 .Fn sleepq_wait
 function is used for non-interruptible sleeps that do not have a timeout.
 The
 .Fn sleepq_timedwait
 function is used for non-interruptible sleeps that have had a timeout set via
 .Fn sleepq_set_timeout .
 The
 .Fn sleepq_wait_sig
 function is used for interruptible sleeps that do not have a timeout.
 The
 .Fn sleepq_timedwait_sig
 function is used for interruptible sleeps that do have a timeout set.
 The
 .Fa wchan
 argument to all of the wait functions is the wait channel being slept
 on.
 The sleep queue chain associated with argument
 .Fa wchan
 needs to have been locked with a prior call to
 .Fn sleepq_lock .
 The
 .Fa pri
 argument is used to set the priority of the thread when it is awakened.
 If it is set to zero, the thread's priority is left alone.
 .Pp
 When the thread is resumed,
 the wait functions return a non-zero value if the thread was awakened due to
 an interrupt other than a signal or a timeout.
 If the sleep timed out, then
 .Er EWOULDBLOCK
 is returned.
 If the sleep was interrupted by something other than a signal,
 then some other return value will be returned.
 .Pp
 A sleeping thread is normally resumed by the
 .Fn sleepq_broadcast
 and
 .Fn sleepq_signal
 functions.
 The
 .Fn sleepq_signal
 function awakens the highest priority thread sleeping on a wait channel while
 .Fn sleepq_broadcast
 awakens all of the threads sleeping on a wait channel.
 The
 .Fa wchan
 argument specifics which wait channel to awaken.
 The
 .Fa flags
 argument must match the sleep queue type contained in the
 .Fa flags
 argument passed to
 .Fn sleepq_add
 by the threads sleeping on the wait channel.
 If the
 .Fa pri
 argument does not equal \-1,
 then each thread that is awakened will have its priority raised to
 .Fa pri
 if it has a lower priority.
 The sleep queue chain associated with argument
 .Fa wchan
 must be locked by a prior call to
 .Fn sleepq_lock
 before calling any of these functions.
 The
 .Fa queue
 argument specifies the sub-queue, from which threads need to be woken up.
 .Pp
 A thread in an interruptible sleep can be interrupted by another thread via
 the
 .Fn sleepq_abort
 function.
 The
 .Fa td
 argument specifies the thread to interrupt.
 An individual thread can also be awakened from sleeping on a specific wait
 channel via the
 .Fn sleepq_remove
 function.
 The
 .Fa td
 argument specifies the thread to awaken and the
 .Fa wchan
 argument specifies the wait channel to awaken it from.
 If the thread
 .Fa td
 is not blocked on the wait channel
 .Fa wchan
 then this function will not do anything,
 even if the thread is asleep on a different wait channel.
 This function should only be used if one of the other functions above is not
 sufficient.
 One possible use is waking up a specific thread from a widely shared sleep
 channel.
 .Pp
 The
 .Fn sleepq_sleepcnt
 function offer a simple way to retrieve the number of threads sleeping for
 the specified
 .Fa queue ,
 given a
 .Fa wchan .
 .Pp
 The
 .Fn sleepq_type
 function returns the type of
 .Fa wchan
 associated to a sleepqueue.
 .Pp
 The
 .Fn sleepq_abort ,
 .Fn sleepq_broadcast ,
 and
 .Fn sleepq_signal
 functions all return a boolean value.
 If the return value is true,
 then at least one thread was resumed that is currently swapped out.
 The caller is responsible for awakening the scheduler process so that the
 resumed thread will be swapped back in.
 This is done by calling the
 .Fn kick_proc0
 function after releasing the sleep queue chain lock via a call to
 .Fn sleepq_release .
 .Pp
 The sleep queue interface is currently used to implement the
 .Xr sleep 9
 and
 .Xr condvar 9
 interfaces.
 Almost all other code in the kernel should use one of those interfaces rather
 than manipulating sleep queues directly.
 .Sh SEE ALSO
 .Xr condvar 9 ,
 .Xr runqueue 9 ,
 .Xr scheduler 9 ,
 .Xr sleep 9 ,
 .Xr timeout 9
Index: head/share/man/man9/swi.9
===================================================================
--- head/share/man/man9/swi.9	(revision 344854)
+++ head/share/man/man9/swi.9	(revision 344855)
@@ -1,250 +1,249 @@
 .\" Copyright (c) 2000-2001 John H. Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd April 19, 2012
 .Dt SWI 9
 .Os
 .Sh NAME
 .Nm swi_add ,
 .Nm swi_remove ,
 .Nm swi_sched
 .Nd register and schedule software interrupt handlers
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
 .In sys/interrupt.h
 .Vt "extern struct intr_event *tty_intr_event" ;
 .Vt "extern struct intr_event *clk_intr_event" ;
 .Vt "extern void *vm_ih" ;
 .Ft int
 .Fo swi_add
 .Fa "struct intr_event **eventp"
 .Fa "const char *name"
 .Fa "driver_intr_t handler"
 .Fa "void *arg"
 .Fa "int pri"
 .Fa "enum intr_type flags"
 .Fa "void **cookiep"
 .Fc
 .Ft int
 .Fn swi_remove "void *cookie"
 .Ft void
 .Fn swi_sched "void *cookie" "int flags"
 .Sh DESCRIPTION
 These functions are used to register and schedule software interrupt handlers.
 Software interrupt handlers are attached to a software interrupt thread, just
 as hardware interrupt handlers are attached to a hardware interrupt thread.
 Multiple handlers can be attached to the same thread.
 Software interrupt handlers can be used to queue up less critical processing
 inside of hardware interrupt handlers so that the work can be done at a later
 time.
 Software interrupt threads are different from other kernel threads in that they
 are treated as an interrupt thread.
 This means that time spent executing these threads is counted as interrupt
 time, and that they can be run via a lightweight context switch.
 .Pp
 The
 .Fn swi_add
 function is used to add a new software interrupt handler to a specified
 interrupt event.
 The
 .Fa eventp
 argument is an optional pointer to a
 .Vt struct intr_event
 pointer.
 If this argument points to an existing event that holds a list of
 interrupt handlers, then this handler will be attached to that event.
 Otherwise a new event will be created, and if
 .Fa eventp
 is not
 .Dv NULL ,
 then the pointer at that address to will be modified to point to the
 newly created event.
 The
 .Fa name
 argument is used to associate a name with a specific handler.
 This name is appended to the name of the software interrupt thread that this
 handler is attached to.
 The
 .Fa handler
 argument is the function that will be executed when the handler is scheduled
 to run.
 The
 .Fa arg
 parameter will be passed in as the only parameter to
 .Fa handler
 when the function is executed.
 The
 .Fa pri
 value specifies the priority of this interrupt handler relative to other
 software interrupt handlers.
 If an interrupt event is created, then this value is used as the vector,
 and the
 .Fa flags
 argument is used to specify the attributes of a handler such as
 .Dv INTR_MPSAFE .
 The
 .Fa cookiep
 argument points to a
 .Vt void *
 cookie.
 This cookie will be set to a value that uniquely identifies this handler,
 and is used to schedule the handler for execution later on.
 .Pp
 The
 .Fn swi_remove
 function is used to teardown an interrupt handler pointed to by the
 .Fa cookie
 argument.
 It detaches the interrupt handler from the associated interrupt event
 and frees its memory.
 .Pp
 The
 .Fn swi_sched
 function is used to schedule an interrupt handler and its associated thread to
 run.
 The
 .Fa cookie
 argument specifies which software interrupt handler should be scheduled to run.
 The
 .Fa flags
 argument specifies how and when the handler should be run and is a mask of one
 or more of the following flags:
 .Bl -tag -width SWI_DELAY
 .It Dv SWI_DELAY
 Specifies that the kernel should mark the specified handler as needing to run,
 but the kernel should not schedule the software interrupt thread to run.
 Instead,
 .Fa handler
 will be executed the next time that the software interrupt thread runs after
 being scheduled by another event.
 Attaching a handler to the clock software interrupt thread and using this flag
 when scheduling a software interrupt handler can be used to implement the
 functionality performed by
 .Fn setdelayed
 in earlier versions of
 .Fx .
 .El
 .Pp
 The
 .Va tty_intr_event
 and
 .Va clk_intr_event
 variables contain pointers to the software interrupt handlers for the tty and
 clock software interrupts, respectively.
 .Va tty_intr_event
 is used to hang tty software interrupt handlers off of the same thread.
 .Va clk_intr_event
 is used to hang delayed handlers off of the clock software interrupt thread so
 that the functionality of
 .Fn setdelayed
 can be obtained in conjunction with
 .Dv SWI_DELAY .
 The
 .Va vm_ih
 handler cookie is used to schedule software interrupt threads to run for the
 VM subsystem.
 .Sh RETURN VALUES
 The
 .Fn swi_add
 and
 .Fn swi_remove
 functions return zero on success and non-zero on failure.
 .Sh ERRORS
 The
 .Fn swi_add
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EAGAIN
 The system-imposed limit on the total
 number of processes under execution would be exceeded.
 The limit is given by the
 .Xr sysctl 3
 MIB variable
 .Dv KERN_MAXPROC .
 .It Bq Er EINVAL
 The
 .Fa flags
 argument specifies
 .Dv INTR_ENTROPY .
 .It Bq Er EINVAL
 The
 .Fa eventp
 argument points to a hardware interrupt thread.
 .It Bq Er EINVAL
 Either of the
 .Fa name
 or
 .Fa handler
 arguments are
 .Dv NULL .
 .It Bq Er EINVAL
 The
 .Dv INTR_EXCL
 flag is specified and the interrupt event pointed to by
 .Fa eventp
 already has at least one handler, or the interrupt event already has an
 exclusive handler.
 .El
 .Pp
 The
 .Fn swi_remove
 function will fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 A software interrupt handler pointed to by
 .Fa cookie
 is
 .Dv NULL .
 .El
 .Sh SEE ALSO
 .Xr ithread 9 ,
 .Xr taskqueue 9
 .Sh HISTORY
 The
 .Fn swi_add
 and
 .Fn swi_sched
 functions first appeared in
 .Fx 5.0 .
 They replaced the
 .Fn register_swi
 function which appeared in
 .Fx 3.0
 and the
 .Fn setsoft* ,
 and
 .Fn schedsoft*
 functions which date back to at least
 .Bx 4.4 .
 The
 .Fn swi_remove
 function first appeared in
 .Fx 6.1 .
 .Sh BUGS
 Most of the global variables described in this manual page should not be
 global, or at the very least should not be declared in
 .In sys/interrupt.h .
Index: head/stand/efi/libefi/devpath.c
===================================================================
--- head/stand/efi/libefi/devpath.c	(revision 344854)
+++ head/stand/efi/libefi/devpath.c	(revision 344855)
@@ -1,231 +1,230 @@
 /*-
  * Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <efi.h>
 #include <efilib.h>
 
 static EFI_GUID ImageDevicePathGUID =
     EFI_LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID;
 static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL;
 static EFI_GUID DevicePathToTextGUID = EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID;
 static EFI_DEVICE_PATH_TO_TEXT_PROTOCOL *textProtocol;
 
 EFI_DEVICE_PATH *
 efi_lookup_image_devpath(EFI_HANDLE handle)
 {
 	EFI_DEVICE_PATH *devpath;
 	EFI_STATUS status;
 
 	status = BS->HandleProtocol(handle, &ImageDevicePathGUID,
 	    (VOID **)&devpath);
 	if (EFI_ERROR(status))
 		devpath = NULL;
 	return (devpath);
 }
 
 EFI_DEVICE_PATH *
 efi_lookup_devpath(EFI_HANDLE handle)
 {
 	EFI_DEVICE_PATH *devpath;
 	EFI_STATUS status;
 
 	status = BS->HandleProtocol(handle, &DevicePathGUID, (VOID **)&devpath);
 	if (EFI_ERROR(status))
 		devpath = NULL;
 	return (devpath);
 }
 
 CHAR16 *
 efi_devpath_name(EFI_DEVICE_PATH *devpath)
 {
 	static int once = 1;
 	EFI_STATUS status;
 
 	if (devpath == NULL)
 		return (NULL);
 	if (once) {
 		status = BS->LocateProtocol(&DevicePathToTextGUID, NULL,
 		    (VOID **)&textProtocol);
 		if (EFI_ERROR(status))
 			textProtocol = NULL;
 		once = 0;
 	}
 	if (textProtocol == NULL)
 		return (NULL);
 
 	return (textProtocol->ConvertDevicePathToText(devpath, TRUE, TRUE));
 }
 
 void
 efi_free_devpath_name(CHAR16 *text)
 {
 
 	BS->FreePool(text);
 }
 
 EFI_DEVICE_PATH *
 efi_devpath_last_node(EFI_DEVICE_PATH *devpath)
 {
 
 	if (IsDevicePathEnd(devpath))
 		return (NULL);
 	while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
 		devpath = NextDevicePathNode(devpath);
 	return (devpath);
 }
 
 EFI_DEVICE_PATH *
 efi_devpath_trim(EFI_DEVICE_PATH *devpath)
 {
 	EFI_DEVICE_PATH *node, *copy;
 	size_t prefix, len;
 
 	if ((node = efi_devpath_last_node(devpath)) == NULL)
 		return (NULL);
 	prefix = (UINT8 *)node - (UINT8 *)devpath;
 	if (prefix == 0)
 		return (NULL);
 	len = prefix + DevicePathNodeLength(NextDevicePathNode(node));
 	copy = malloc(len);
 	if (copy != NULL) {
 		memcpy(copy, devpath, prefix);
 		node = (EFI_DEVICE_PATH *)((UINT8 *)copy + prefix);
 		SetDevicePathEndNode(node);
 	}
 	return (copy);
 }
 
 EFI_HANDLE
 efi_devpath_handle(EFI_DEVICE_PATH *devpath)
 {
 	EFI_STATUS status;
 	EFI_HANDLE h;
 
 	/*
 	 * There isn't a standard way to locate a handle for a given
 	 * device path.  However, querying the EFI_DEVICE_PATH protocol
 	 * for a given device path should give us a handle for the
 	 * closest node in the path to the end that is valid.
 	 */
 	status = BS->LocateDevicePath(&DevicePathGUID, &devpath, &h);
 	if (EFI_ERROR(status))
 		return (NULL);
 	return (h);
 }
 
 bool
 efi_devpath_match_node(EFI_DEVICE_PATH *devpath1, EFI_DEVICE_PATH *devpath2)
 {
 	size_t len;
 
 	if (devpath1 == NULL || devpath2 == NULL)
 		return (false);
 	if (DevicePathType(devpath1) != DevicePathType(devpath2) ||
 	    DevicePathSubType(devpath1) != DevicePathSubType(devpath2))
 		return (false);
 	len = DevicePathNodeLength(devpath1);
 	if (len != DevicePathNodeLength(devpath2))
 		return (false);
 	if (memcmp(devpath1, devpath2, len) != 0)
 		return (false);
 	return (true);
 }
 
 bool
 efi_devpath_match(EFI_DEVICE_PATH *devpath1, EFI_DEVICE_PATH *devpath2)
 {
 
 	if (devpath1 == NULL || devpath2 == NULL)
 		return (false);
 
 	while (true) {
 		if (!efi_devpath_match_node(devpath1, devpath2))
 			return false;
 		if (IsDevicePathEnd(devpath1))
 			break;
 		devpath1 = NextDevicePathNode(devpath1);
 		devpath2 = NextDevicePathNode(devpath2);
 	}
 	return (true);
 }
 
 bool
 efi_devpath_is_prefix(EFI_DEVICE_PATH *prefix, EFI_DEVICE_PATH *path)
 {
 	size_t len;
 
 	if (prefix == NULL || path == NULL)
 		return (false);
 
 	while (1) {
 		if (IsDevicePathEnd(prefix))
 			break;
 
 		if (DevicePathType(prefix) != DevicePathType(path) ||
 		    DevicePathSubType(prefix) != DevicePathSubType(path))
 			return (false);
 
 		len = DevicePathNodeLength(prefix);
 		if (len != DevicePathNodeLength(path))
 			return (false);
 
 		if (memcmp(prefix, path, len) != 0)
 			return (false);
 
 		prefix = NextDevicePathNode(prefix);
 		path = NextDevicePathNode(path);
 	}
 	return (true);
 }
 
 /*
  * Skip over the 'prefix' part of path and return the part of the path
  * that starts with the first node that's a MEDIA_DEVICE_PATH.
  */
 EFI_DEVICE_PATH *
 efi_devpath_to_media_path(EFI_DEVICE_PATH *path)
 {
 
 	while (!IsDevicePathEnd(path)) {
 		if (DevicePathType(path) == MEDIA_DEVICE_PATH)
 			return (path);
 		path = NextDevicePathNode(path);
 	}
 	return (NULL);
 }
 
 UINTN
 efi_devpath_length(EFI_DEVICE_PATH  *path)
 {
 	EFI_DEVICE_PATH *start = path;
 
 	while (!IsDevicePathEnd(path))
 		path = NextDevicePathNode(path);
 	return ((UINTN)path - (UINTN)start) + DevicePathNodeLength(path);
 }
Index: head/stand/i386/cdboot/cdboot.S
===================================================================
--- head/stand/i386/cdboot/cdboot.S	(revision 344854)
+++ head/stand/i386/cdboot/cdboot.S	(revision 344855)
@@ -1,594 +1,593 @@
 #
 # Copyright (c) 2001 John Baldwin <jhb@FreeBSD.org>
-# All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 
 # $FreeBSD$
 
 #
 # This program is a freestanding boot program to load an a.out binary
 # from a CD-ROM booted with no emulation mode as described by the El
 # Torito standard.  Due to broken BIOSen that do not load the desired
 # number of sectors, we try to fit this in as small a space as possible.
 #
 # Basically, we first create a set of boot arguments to pass to the loaded
 # binary.  Then we attempt to load /boot/loader from the CD we were booted
 # off of. 
 #
 
 #include <bootargs.h>
 
 #
 # Memory locations.
 #
 		.set MEM_PAGE_SIZE,0x1000	# memory page size, 4k
 		.set MEM_ARG,0x900		# Arguments at start
 		.set MEM_ARG_BTX,0xa100		# Where we move them to so the
 						#  BTX client can see them
 		.set MEM_ARG_SIZE,0x18		# Size of the arguments
 		.set MEM_BTX_ADDRESS,0x9000	# where BTX lives
 		.set MEM_BTX_ENTRY,0x9010	# where BTX starts to execute
 		.set MEM_BTX_OFFSET,MEM_PAGE_SIZE # offset of BTX in the loader
 		.set MEM_BTX_CLIENT,0xa000	# where BTX clients live
 #
 # a.out header fields
 #
 		.set AOUT_TEXT,0x04		# text segment size
 		.set AOUT_DATA,0x08		# data segment size
 		.set AOUT_BSS,0x0c		# zero'd BSS size
 		.set AOUT_SYMBOLS,0x10		# symbol table
 		.set AOUT_ENTRY,0x14		# entry point
 		.set AOUT_HEADER,MEM_PAGE_SIZE	# size of the a.out header
 #
 # Segment selectors.
 #
 		.set SEL_SDATA,0x8		# Supervisor data
 		.set SEL_RDATA,0x10		# Real mode data
 		.set SEL_SCODE,0x18		# PM-32 code
 		.set SEL_SCODE16,0x20		# PM-16 code
 #
 # BTX constants
 #
 		.set INT_SYS,0x30		# BTX syscall interrupt
 #
 # Constants for reading from the CD.
 #
 		.set ERROR_TIMEOUT,0x80		# BIOS timeout on read
 		.set NUM_RETRIES,3		# Num times to retry
 		.set SECTOR_SIZE,0x800		# size of a sector
 		.set SECTOR_SHIFT,11		# number of place to shift
 		.set BUFFER_LEN,0x100		# number of sectors in buffer
 		.set MAX_READ,0x10000		# max we can read at a time
 		.set MAX_READ_SEC,MAX_READ >> SECTOR_SHIFT
 		.set MEM_READ_BUFFER,0x9000	# buffer to read from CD
 		.set MEM_VOLDESC,MEM_READ_BUFFER # volume descriptor
 		.set MEM_DIR,MEM_VOLDESC+SECTOR_SIZE # Lookup buffer
 		.set VOLDESC_LBA,0x10		# LBA of vol descriptor
 		.set VD_PRIMARY,1		# Primary VD
 		.set VD_END,255			# VD Terminator
 		.set VD_ROOTDIR,156		# Offset of Root Dir Record
 		.set DIR_LEN,0			# Offset of Dir Record length
 		.set DIR_EA_LEN,1		# Offset of EA length
 		.set DIR_EXTENT,2		# Offset of 64-bit LBA
 		.set DIR_SIZE,10		# Offset of 64-bit length
 		.set DIR_NAMELEN,32		# Offset of 8-bit name len
 		.set DIR_NAME,33		# Offset of dir name
 #
 # We expect to be loaded by the BIOS at 0x7c00 (standard boot loader entry
 # point)
 #
 		.code16
 		.globl start
 		.org 0x0, 0x0
 #
 # Program start.
 #
 start:		cld				# string ops inc
 		xor %ax,%ax			# zero %ax
 		mov %ax,%ss			# setup the
 		mov $start,%sp			#  stack
 		mov %ax,%ds			# setup the
 		mov %ax,%es			#  data segments
 		mov %dl,drive			# Save BIOS boot device
 		mov $msg_welcome,%si		# %ds:(%si) -> welcome message
 		call putstr			# display the welcome message
 #
 # Setup the arguments that the loader is expecting from boot[12]
 #
 		mov $msg_bootinfo,%si		# %ds:(%si) -> boot args message
 		call putstr			# display the message
 		mov $MEM_ARG,%bx		# %ds:(%bx) -> boot args
 		mov %bx,%di			# %es:(%di) -> boot args
 		xor %eax,%eax			# zero %eax
 		mov $(MEM_ARG_SIZE/4),%cx	# Size of arguments in 32-bit
 						#  dwords
 		rep				# Clear the arguments
 		stosl				#  to zero
 		mov drive,%dl			# Store BIOS boot device
 		mov %dl,0x4(%bx)		#  in kargs->bootdev
 		orb $KARGS_FLAGS_CD,0x8(%bx)	# kargs->bootflags |=
 						#  KARGS_FLAGS_CD
 #
 # Load Volume Descriptor
 #
 		mov $VOLDESC_LBA,%eax		# Set LBA of first VD
 load_vd:	push %eax			# Save %eax
 		mov $1,%dh			# One sector
 		mov $MEM_VOLDESC,%ebx		# Destination
 		call read			# Read it in
 		cmpb $VD_PRIMARY,(%bx)		# Primary VD?
 		je have_vd			# Yes
 		pop %eax			# Prepare to
 		inc %eax			#  try next
 		cmpb $VD_END,(%bx)		# Last VD?
 		jne load_vd			# No, read next
 		mov $msg_novd,%si		# No VD
 		jmp error			# Halt
 have_vd:					# Have Primary VD
 #
 # Try to look up the loader binary using the paths in the loader_paths
 # array.
 #
 		mov $loader_paths,%si		# Point to start of array
 lookup_path:	push %si			# Save file name pointer
 		call lookup			# Try to find file
 		pop %di				# Restore file name pointer
 		jnc lookup_found		# Found this file
 		xor %al,%al			# Look for next
 		mov $0xffff,%cx			#  path name by
 		repnz				#  scanning for
 		scasb				#  nul char
 		mov %di,%si			# Point %si at next path
 		mov (%si),%al			# Get first char of next path
 		or %al,%al			# Is it double nul?
 		jnz lookup_path			# No, try it.
 		mov $msg_failed,%si		# Failed message
 		jmp error			# Halt
 lookup_found:					# Found a loader file
 #
 # Load the binary into the buffer.  Due to real mode addressing limitations
 # we have to read it in 64k chunks.
 #
 		mov DIR_SIZE(%bx),%eax		# Read file length
 		add $SECTOR_SIZE-1,%eax		# Convert length to sectors
 		shr $SECTOR_SHIFT,%eax
 		cmp $BUFFER_LEN,%eax
 		jbe load_sizeok
 		mov $msg_load2big,%si		# Error message
 		call error
 load_sizeok:	movzbw %al,%cx			# Num sectors to read
 		mov DIR_EXTENT(%bx),%eax	# Load extent
 		xor %edx,%edx
 		mov DIR_EA_LEN(%bx),%dl
 		add %edx,%eax			# Skip extended
 		mov $MEM_READ_BUFFER,%ebx	# Read into the buffer
 load_loop:	mov %cl,%dh
 		cmp $MAX_READ_SEC,%cl		# Truncate to max read size
 		jbe load_notrunc
 		mov $MAX_READ_SEC,%dh
 load_notrunc:	sub %dh,%cl			# Update count
 		push %eax			# Save
 		call read			# Read it in
 		pop %eax			# Restore
 		add $MAX_READ_SEC,%eax		# Update LBA
 		add $MAX_READ,%ebx		# Update dest addr
 		jcxz load_done			# Done?
 		jmp load_loop			# Keep going
 load_done:
 #
 # Turn on the A20 address line
 #
 		call seta20			# Turn A20 on
 #
 # Relocate the loader and BTX using a very lazy protected mode
 #
 		mov $msg_relocate,%si		# Display the
 		call putstr			#  relocation message
 		mov MEM_READ_BUFFER+AOUT_ENTRY,%edi # %edi is the destination
 		mov $(MEM_READ_BUFFER+AOUT_HEADER),%esi	# %esi is
 						#  the start of the text
 						#  segment
 		mov MEM_READ_BUFFER+AOUT_TEXT,%ecx # %ecx = length of the text
 						#  segment
 		push %edi			# Save entry point for later
 		lgdt gdtdesc			# setup our own gdt
 		cli				# turn off interrupts
 		mov %cr0,%eax			# Turn on
 		or $0x1,%al			#  protected
 		mov %eax,%cr0			#  mode
 		ljmp $SEL_SCODE,$pm_start	# long jump to clear the
 						#  instruction pre-fetch queue
 		.code32
 pm_start:	mov $SEL_SDATA,%ax		# Initialize
 		mov %ax,%ds			#  %ds and
 		mov %ax,%es			#  %es to a flat selector
 		rep				# Relocate the
 		movsb				#  text segment
 		add $(MEM_PAGE_SIZE - 1),%edi	# pad %edi out to a new page
 		and $~(MEM_PAGE_SIZE - 1),%edi #  for the data segment
 		mov MEM_READ_BUFFER+AOUT_DATA,%ecx # size of the data segment
 		rep				# Relocate the
 		movsb				#  data segment
 		mov MEM_READ_BUFFER+AOUT_BSS,%ecx # size of the bss
 		xor %eax,%eax			# zero %eax
 		add $3,%cl			# round %ecx up to
 		shr $2,%ecx			#  a multiple of 4
 		rep				# zero the
 		stosl				#  bss
 		mov MEM_READ_BUFFER+AOUT_ENTRY,%esi # %esi -> relocated loader
 		add $MEM_BTX_OFFSET,%esi	# %esi -> BTX in the loader
 		mov $MEM_BTX_ADDRESS,%edi	# %edi -> where BTX needs to go
 		movzwl 0xa(%esi),%ecx		# %ecx -> length of BTX
 		rep				# Relocate
 		movsb				#  BTX
 		ljmp $SEL_SCODE16,$pm_16	# Jump to 16-bit PM
 		.code16
 pm_16:		mov $SEL_RDATA,%ax		# Initialize
 		mov %ax,%ds			#  %ds and
 		mov %ax,%es			#  %es to a real mode selector
 		mov %cr0,%eax			# Turn off
 		and $~0x1,%al			#  protected
 		mov %eax,%cr0			#  mode
 		ljmp $0,$pm_end			# Long jump to clear the
 						#  instruction pre-fetch queue
 pm_end:		sti				# Turn interrupts back on now
 #
 # Copy the BTX client to MEM_BTX_CLIENT
 #
 		xor %ax,%ax			# zero %ax and set
 		mov %ax,%ds			#  %ds and %es
 		mov %ax,%es			#  to segment 0
 		mov $MEM_BTX_CLIENT,%di		# Prepare to relocate
 		mov $btx_client,%si		#  the simple btx client
 		mov $(btx_client_end-btx_client),%cx # length of btx client
 		rep				# Relocate the
 		movsb				#  simple BTX client
 #
 # Copy the boot[12] args to where the BTX client can see them
 #
 		mov $MEM_ARG,%si		# where the args are at now
 		mov $MEM_ARG_BTX,%di		# where the args are moving to
 		mov $(MEM_ARG_SIZE/4),%cx	# size of the arguments in longs
 		rep				# Relocate
 		movsl				#  the words
 #
 # Save the entry point so the client can get to it later on
 #
 		pop %eax			# Restore saved entry point
 		stosl				#  and add it to the end of
 						#  the arguments
 #
 # Now we just start up BTX and let it do the rest
 #
 		mov $msg_jump,%si		# Display the
 		call putstr			#  jump message
 		ljmp $0,$MEM_BTX_ENTRY		# Jump to the BTX entry point
 
 #
 # Lookup the file in the path at [SI] from the root directory.
 #
 # Trashes: All but BX
 # Returns: CF = 0 (success), BX = pointer to record
 #          CF = 1 (not found)
 #
 lookup:		mov $VD_ROOTDIR+MEM_VOLDESC,%bx	# Root directory record
 		push %si
 		mov $msg_lookup,%si		# Display lookup message
 		call putstr
 		pop %si
 		push %si
 		call putstr
 		mov $msg_lookup2,%si
 		call putstr
 		pop %si
 lookup_dir:	lodsb				# Get first char of path
 		cmp $0,%al			# Are we done?
 		je lookup_done			# Yes
 		cmp $'/',%al			# Skip path separator.
 		je lookup_dir
 		dec %si				# Undo lodsb side effect
 		call find_file			# Lookup first path item
 		jnc lookup_dir			# Try next component
 		mov $msg_lookupfail,%si		# Not found message
 		call putstr
 		stc				# Set carry
 		ret
 		jmp error
 lookup_done:	mov $msg_lookupok,%si		# Success message
 		call putstr
 		clc				# Clear carry
 		ret
 
 #
 # Lookup file at [SI] in directory whose record is at [BX].
 #
 # Trashes: All but returns
 # Returns: CF = 0 (success), BX = pointer to record, SI = next path item
 #          CF = 1 (not found), SI = preserved
 #
 find_file:	mov DIR_EXTENT(%bx),%eax	# Load extent
 		xor %edx,%edx
 		mov DIR_EA_LEN(%bx),%dl
 		add %edx,%eax			# Skip extended attributes
 		mov %eax,rec_lba		# Save LBA
 		mov DIR_SIZE(%bx),%eax		# Save size
 		mov %eax,rec_size
 		xor %cl,%cl			# Zero length
 		push %si			# Save
 ff.namelen:	inc %cl				# Update length
 		lodsb				# Read char
 		cmp $0,%al			# Nul?
 		je ff.namedone			# Yes
 		cmp $'/',%al			# Path separator?
 		jnz ff.namelen			# No, keep going
 ff.namedone:	dec %cl				# Adjust length and save
 		mov %cl,name_len
 		pop %si				# Restore
 ff.load:	mov rec_lba,%eax		# Load LBA
 		mov $MEM_DIR,%ebx		# Address buffer
 		mov $1,%dh			# One sector
 		call read			# Read directory block
 		incl rec_lba			# Update LBA to next block
 ff.scan:	mov %ebx,%edx			# Check for EOF
 		sub $MEM_DIR,%edx
 		cmp %edx,rec_size
 		ja ff.scan.1
 		stc				# EOF reached
 		ret
 ff.scan.1:	cmpb $0,DIR_LEN(%bx)		# Last record in block?
 		je ff.nextblock
 		push %si			# Save
 		movzbw DIR_NAMELEN(%bx),%si	# Find end of string
 ff.checkver:	cmpb $'0',DIR_NAME-1(%bx,%si)	# Less than '0'?
 		jb ff.checkver.1
 		cmpb $'9',DIR_NAME-1(%bx,%si)	# Greater than '9'?
 		ja ff.checkver.1
 		dec %si				# Next char
 		jnz ff.checkver
 		jmp ff.checklen			# All numbers in name, so
 						#  no version
 ff.checkver.1:	movzbw DIR_NAMELEN(%bx),%cx
 		cmp %cx,%si			# Did we find any digits?
 		je ff.checkdot			# No
 		cmpb $';',DIR_NAME-1(%bx,%si)	# Check for semicolon
 		jne ff.checkver.2
 		dec %si				# Skip semicolon
 		mov %si,%cx
 		mov %cl,DIR_NAMELEN(%bx)	# Adjust length
 		jmp ff.checkdot
 ff.checkver.2:	mov %cx,%si			# Restore %si to end of string
 ff.checkdot:	cmpb $'.',DIR_NAME-1(%bx,%si)	# Trailing dot?
 		jne ff.checklen			# No
 		decb DIR_NAMELEN(%bx)		# Adjust length
 ff.checklen:	pop %si				# Restore
 		movzbw name_len,%cx		# Load length of name
 		cmp %cl,DIR_NAMELEN(%bx)	# Does length match?
 		je ff.checkname			# Yes, check name
 ff.nextrec:	add DIR_LEN(%bx),%bl		# Next record
 		adc $0,%bh
 		jmp ff.scan
 ff.nextblock:	subl $SECTOR_SIZE,rec_size	# Adjust size
 		jnc ff.load			# If subtract ok, keep going
 		ret				# End of file, so not found
 ff.checkname:	lea DIR_NAME(%bx),%di		# Address name in record
 		push %si			# Save
 		repe cmpsb			# Compare name
 		je ff.match			# We have a winner!
 		pop %si				# Restore
 		jmp ff.nextrec			# Keep looking.
 ff.match:	add $2,%sp			# Discard saved %si
 		clc				# Clear carry
 		ret
 
 #
 # Load DH sectors starting at LBA EAX into [EBX].
 #
 # Trashes: EAX
 #
 read:		push %si			# Save
 		push %cx			# Save since some BIOSs trash
 		mov %eax,edd_lba		# LBA to read from
 		mov %ebx,%eax			# Convert address
 		shr $4,%eax			#  to segment
 		mov %ax,edd_addr+0x2		#  and store
 read.retry:	call twiddle			# Entertain the user
 		push %dx			# Save
 		mov $edd_packet,%si		# Address Packet
 		mov %dh,edd_len			# Set length
 		mov drive,%dl			# BIOS Device
 		mov $0x42,%ah			# BIOS: Extended Read
 		int $0x13			# Call BIOS
 		pop %dx				# Restore
 		jc read.fail			# Worked?
 		pop %cx				# Restore
 		pop %si
 		ret				# Return
 read.fail:	cmp $ERROR_TIMEOUT,%ah		# Timeout?
 		je read.retry			# Yes, Retry.
 read.error:	mov %ah,%al			# Save error
 		mov $hex_error,%di		# Format it
 		call hex8			#  as hex
 		mov $msg_badread,%si		# Display Read error message
 
 #
 # Display error message at [SI] and halt.
 #
 error:		call putstr			# Display message
 halt:		hlt
 		jmp halt			# Spin
 
 #
 # Display a null-terminated string.
 #
 # Trashes: AX, SI
 #
 putstr:		push %bx			# Save
 putstr.load:	lodsb				# load %al from %ds:(%si)
 		test %al,%al			# stop at null
 		jnz putstr.putc			# if the char != null, output it
 		pop %bx				# Restore
 		ret				# return when null is hit
 putstr.putc:	call putc			# output char
 		jmp putstr.load			# next char
 
 #
 # Display a single char.
 #
 putc:		mov $0x7,%bx			# attribute for output
 		mov $0xe,%ah			# BIOS: put_char
 		int $0x10			# call BIOS, print char in %al
 		ret				# Return to caller
 
 #
 # Output the "twiddle"
 #
 twiddle:	push %ax			# Save
 		push %bx			# Save
 		mov twiddle_index,%al		# Load index
 		mov $twiddle_chars,%bx		# Address table
 		inc %al				# Next
 		and $3,%al			#  char
 		mov %al,twiddle_index		# Save index for next call
 		xlat				# Get char
 		call putc			# Output it
 		mov $8,%al			# Backspace
 		call putc			# Output it
 		pop %bx				# Restore
 		pop %ax				# Restore
 		ret
 
 #
 # Enable A20. Put an upper limit on the amount of time we wait for the
 # keyboard controller to get ready (65K x ISA access time). If
 # we wait more than that amount, the hardware is probably
 # legacy-free and simply doesn't have a keyboard controller.
 # Thus, the A20 line is already enabled.
 #
 seta20: 	cli				# Disable interrupts
 		xor %cx,%cx			# Clear
 seta20.1:	inc %cx				# Increment, overflow?
 		jz seta20.3			# Yes
 		in $0x64,%al			# Get status
 		test $0x2,%al			# Busy?
 		jnz seta20.1			# Yes
 		mov $0xd1,%al			# Command: Write
 		out %al,$0x64			#  output port
 seta20.2:	in $0x64,%al			# Get status
 		test $0x2,%al			# Busy?
 		jnz seta20.2			# Yes
 		mov $0xdf,%al			# Enable
 		out %al,$0x60			#  A20
 seta20.3:	sti				# Enable interrupts
 		ret				# To caller
 
 #
 # Convert AL to hex, saving the result to [EDI].
 #
 hex8:		pushl %eax			# Save
 		shrb $0x4,%al			# Do upper
 		call hex8.1			#  4
 		popl %eax			# Restore
 hex8.1: 	andb $0xf,%al			# Get lower 4
 		cmpb $0xa,%al			# Convert
 		sbbb $0x69,%al			#  to hex
 		das				#  digit
 		orb $0x20,%al			# To lower case
 		stosb				# Save char
 		ret				# (Recursive)
 
 #
 # BTX client to start btxldr
 #
 		.code32
 btx_client:	mov $(MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE-4), %esi
 						# %ds:(%esi) -> end
 						#  of boot[12] args
 		mov $(MEM_ARG_SIZE/4),%ecx	# Number of words to push
 		std				# Go backwards
 push_arg:	lodsl				# Read argument
 		push %eax			# Push it onto the stack
 		loop push_arg			# Push all of the arguments
 		cld				# In case anyone depends on this
 		pushl MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE # Entry point of
 						#  the loader
 		push %eax			# Emulate a near call
 		mov $0x1,%eax			# 'exec' system call
 		int $INT_SYS			# BTX system call
 btx_client_end:
 		.code16
 
 		.p2align 4
 #
 # Global descriptor table.
 #
 gdt:		.word 0x0,0x0,0x0,0x0		# Null entry
 		.word 0xffff,0x0,0x9200,0xcf	# SEL_SDATA
 		.word 0xffff,0x0,0x9200,0x0	# SEL_RDATA
 		.word 0xffff,0x0,0x9a00,0xcf	# SEL_SCODE (32-bit)
 		.word 0xffff,0x0,0x9a00,0x8f	# SEL_SCODE16 (16-bit)
 gdt.1:
 #
 # Pseudo-descriptors.
 #
 gdtdesc:	.word gdt.1-gdt-1		# Limit
 		.long gdt			# Base
 #
 # EDD Packet
 #
 edd_packet:	.byte 0x10			# Length
 		.byte 0				# Reserved
 edd_len:	.byte 0x0			# Num to read
 		.byte 0				# Reserved
 edd_addr:	.word 0x0,0x0			# Seg:Off
 edd_lba:	.quad 0x0			# LBA
 
 drive:		.byte 0
 
 #
 # State for searching dir
 #
 rec_lba:	.long 0x0			# LBA (adjusted for EA)
 rec_size:	.long 0x0			# File size
 name_len:	.byte 0x0			# Length of current name
 
 twiddle_index:	.byte 0x0
 
 msg_welcome:	.asciz	"CD Loader 1.2\r\n\n"
 msg_bootinfo:	.asciz	"Building the boot loader arguments\r\n"
 msg_relocate:	.asciz	"Relocating the loader and the BTX\r\n"
 msg_jump:	.asciz	"Starting the BTX loader\r\n"
 msg_badread:	.ascii  "Read Error: 0x"
 hex_error:	.asciz	"00\r\n"
 msg_novd:	.asciz  "Could not find Primary Volume Descriptor\r\n"
 msg_lookup:	.asciz  "Looking up "
 msg_lookup2:	.asciz  "... "
 msg_lookupok:	.asciz  "Found\r\n"
 msg_lookupfail:	.asciz  "File not found\r\n"
 msg_load2big:	.asciz  "File too big\r\n"
 msg_failed:	.asciz	"Boot failed\r\n"
 twiddle_chars:	.ascii	"|/-\\"
 loader_paths:	.asciz  "/BOOT/LOADER"
 		.asciz	"/boot/loader"
 		.byte 0
 
Index: head/stand/i386/libi386/pxe.c
===================================================================
--- head/stand/i386/libi386/pxe.c	(revision 344854)
+++ head/stand/i386/libi386/pxe.c	(revision 344855)
@@ -1,573 +1,573 @@
 /*-
  * Copyright (c) 2000 Alfred Perlstein <alfred@freebsd.org>
  * Copyright (c) 2000 Paul Saab <ps@freebsd.org>
- * Copyright (c) 2000 John Baldwin <jhb@freebsd.org>
  * All rights reserved.
+ * Copyright (c) 2000 John Baldwin <jhb@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <stddef.h>
 #include <string.h>
 #include <stdarg.h>
 #include <sys/param.h>
 
 #include <net/ethernet.h>
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
 
 #include <net.h>
 #include <netif.h>
 #include <nfsv2.h>
 #include <iodesc.h>
 
 #include <bootp.h>
 #include <bootstrap.h>
 #include "libi386.h"
 #include "btxv86.h"
 #include "pxe.h"
 
 static pxenv_t *pxenv_p = NULL;	/* PXENV+ */
 static pxe_t *pxe_p = NULL;		/* !PXE */
 
 #ifdef PXE_DEBUG
 static int	pxe_debug = 0;
 #endif
 
 void		pxe_enable(void *pxeinfo);
 static void	(*pxe_call)(int func, void *ptr);
 static void	pxenv_call(int func, void *ptr);
 static void	bangpxe_call(int func, void *ptr);
 
 static int	pxe_init(void);
 static int	pxe_print(int verbose);
 static void	pxe_cleanup(void);
 
 static void	pxe_perror(int error);
 static int	pxe_netif_match(struct netif *nif, void *machdep_hint);
 static int	pxe_netif_probe(struct netif *nif, void *machdep_hint);
 static void	pxe_netif_init(struct iodesc *desc, void *machdep_hint);
 static ssize_t	pxe_netif_get(struct iodesc *, void **, time_t);
 static ssize_t	pxe_netif_put(struct iodesc *desc, void *pkt, size_t len);
 static void	pxe_netif_end(struct netif *nif);
 
 extern struct netif_stats	pxe_st[];
 extern uint16_t			__bangpxeseg;
 extern uint16_t			__bangpxeoff;
 extern void			__bangpxeentry(void);
 extern uint16_t			__pxenvseg;
 extern uint16_t			__pxenvoff;
 extern void			__pxenventry(void);
 
 struct netif_dif pxe_ifs[] = {
 /*	dif_unit        dif_nsel        dif_stats       dif_private     */
 	{0,             1,              &pxe_st[0],     0}
 };
 
 struct netif_stats pxe_st[nitems(pxe_ifs)];
 
 struct netif_driver pxenetif = {
 	.netif_bname = "pxenet",
 	.netif_match = pxe_netif_match,
 	.netif_probe = pxe_netif_probe,
 	.netif_init = pxe_netif_init,
 	.netif_get = pxe_netif_get,
 	.netif_put = pxe_netif_put,
 	.netif_end = pxe_netif_end,
 	.netif_ifs = pxe_ifs,
 	.netif_nifs = nitems(pxe_ifs)
 };
 
 struct netif_driver *netif_drivers[] = {
 	&pxenetif,
 	NULL
 };
 
 struct devsw pxedisk = {
 	.dv_name = "net",
 	.dv_type = DEVT_NET,
 	.dv_init = pxe_init,
 	.dv_strategy = NULL,	/* Will be set in pxe_init */
 	.dv_open = NULL,	/* Will be set in pxe_init */
 	.dv_close = NULL,	/* Will be set in pxe_init */
 	.dv_ioctl = noioctl,
 	.dv_print = pxe_print,
 	.dv_cleanup = pxe_cleanup
 };
 
 /*
  * This function is called by the loader to enable PXE support if we
  * are booted by PXE. The passed in pointer is a pointer to the PXENV+
  * structure.
  */
 void
 pxe_enable(void *pxeinfo)
 {
 	pxenv_p  = (pxenv_t *)pxeinfo;
 	pxe_p    = (pxe_t *)PTOV(pxenv_p->PXEPtr.segment * 16 +
 				 pxenv_p->PXEPtr.offset);
 	pxe_call = NULL;
 }
 
 /*
  * return true if pxe structures are found/initialized,
  * also figures out our IP information via the pxe cached info struct
  */
 static int
 pxe_init(void)
 {
 	t_PXENV_GET_CACHED_INFO *gci_p;
 	int counter;
 	uint8_t checksum;
 	uint8_t *checkptr;
 	extern struct devsw netdev;
 
 	if (pxenv_p == NULL)
 		return (0);
 
 	/* look for "PXENV+" */
 	if (bcmp((void *)pxenv_p->Signature, S_SIZE("PXENV+"))) {
 		pxenv_p = NULL;
 		return (0);
 	}
 
 	/* make sure the size is something we can handle */
 	if (pxenv_p->Length > sizeof(*pxenv_p)) {
 		printf("PXENV+ structure too large, ignoring\n");
 		pxenv_p = NULL;
 		return (0);
 	}
 
 	/*
 	 * do byte checksum:
 	 * add up each byte in the structure, the total should be 0
 	 */
 	checksum = 0;
 	checkptr = (uint8_t *) pxenv_p;
 	for (counter = 0; counter < pxenv_p->Length; counter++)
 		checksum += *checkptr++;
 	if (checksum != 0) {
 		printf("PXENV+ structure failed checksum, ignoring\n");
 		pxenv_p = NULL;
 		return (0);
 	}
 
 	/*
 	 * PXENV+ passed, so use that if !PXE is not available or
 	 * the checksum fails.
 	 */
 	pxe_call = pxenv_call;
 	if (pxenv_p->Version >= 0x0200) {
 		for (;;) {
 			if (bcmp((void *)pxe_p->Signature, S_SIZE("!PXE"))) {
 				pxe_p = NULL;
 				break;
 			}
 			checksum = 0;
 			checkptr = (uint8_t *)pxe_p;
 			for (counter = 0; counter < pxe_p->StructLength;
 			    counter++)
 				checksum += *checkptr++;
 			if (checksum != 0) {
 				pxe_p = NULL;
 				break;
 			}
 			pxe_call = bangpxe_call;
 			break;
 		}
 	}
 
 	pxedisk.dv_open = netdev.dv_open;
 	pxedisk.dv_close = netdev.dv_close;
 	pxedisk.dv_strategy = netdev.dv_strategy;
 
 	printf("\nPXE version %d.%d, real mode entry point ",
 	    (uint8_t) (pxenv_p->Version >> 8),
 	    (uint8_t) (pxenv_p->Version & 0xFF));
 	if (pxe_call == bangpxe_call)
 		printf("@%04x:%04x\n",
 		    pxe_p->EntryPointSP.segment,
 		    pxe_p->EntryPointSP.offset);
 	else
 		printf("@%04x:%04x\n",
 		    pxenv_p->RMEntry.segment, pxenv_p->RMEntry.offset);
 
 	gci_p = bio_alloc(sizeof(*gci_p));
 	if (gci_p == NULL) {
 		pxe_p = NULL;
 		return (0);
 	}
 	bzero(gci_p, sizeof(*gci_p));
 	gci_p->PacketType = PXENV_PACKET_TYPE_BINL_REPLY;
 	pxe_call(PXENV_GET_CACHED_INFO, gci_p);
 	if (gci_p->Status != 0) {
 		pxe_perror(gci_p->Status);
 		bio_free(gci_p, sizeof(*gci_p));
 		pxe_p = NULL;
 		return (0);
 	}
 	free(bootp_response);
 	if ((bootp_response = malloc(gci_p->BufferSize)) != NULL) {
 		bootp_response_size = gci_p->BufferSize;
 		bcopy(PTOV((gci_p->Buffer.segment << 4) + gci_p->Buffer.offset),
 		    bootp_response, bootp_response_size);
 	}
 	bio_free(gci_p, sizeof(*gci_p));
 	return (1);
 }
 
 static int
 pxe_print(int verbose)
 {
 	if (pxe_call == NULL)
 		return (0);
 
 	printf("%s devices:", pxedisk.dv_name);
 	if (pager_output("\n") != 0)
 		return (1);
 	printf("    %s0:", pxedisk.dv_name);
 	if (verbose) {
 		printf("    %s:%s", inet_ntoa(rootip), rootpath);
 	}
 	return (pager_output("\n"));
 }
 
 static void
 pxe_cleanup(void)
 {
 	t_PXENV_UNLOAD_STACK *unload_stack_p;
 	t_PXENV_UNDI_SHUTDOWN *undi_shutdown_p;
 
 	if (pxe_call == NULL)
 		return;
 
 	undi_shutdown_p = bio_alloc(sizeof(*undi_shutdown_p));
 	if (undi_shutdown_p != NULL) {
 		bzero(undi_shutdown_p, sizeof(*undi_shutdown_p));
 		pxe_call(PXENV_UNDI_SHUTDOWN, undi_shutdown_p);
 
 #ifdef PXE_DEBUG
 		if (pxe_debug && undi_shutdown_p->Status != 0)
 			printf("pxe_cleanup: UNDI_SHUTDOWN failed %x\n",
 			    undi_shutdown_p->Status);
 #endif
 		bio_free(undi_shutdown_p, sizeof(*undi_shutdown_p));
 	}
 
 	unload_stack_p = bio_alloc(sizeof(*unload_stack_p));
 	if (unload_stack_p != NULL) {
 		bzero(unload_stack_p, sizeof(*unload_stack_p));
 		pxe_call(PXENV_UNLOAD_STACK, unload_stack_p);
 
 #ifdef PXE_DEBUG
 		if (pxe_debug && unload_stack_p->Status != 0)
 			printf("pxe_cleanup: UNLOAD_STACK failed %x\n",
 			    unload_stack_p->Status);
 #endif
 		bio_free(unload_stack_p, sizeof(*unload_stack_p));
 	}
 }
 
 void
 pxe_perror(int err)
 {
 	return;
 }
 
 void
 pxenv_call(int func, void *ptr)
 {
 #ifdef PXE_DEBUG
 	if (pxe_debug)
 		printf("pxenv_call %x\n", func);
 #endif
 	
 	bzero(&v86, sizeof(v86));
 
 	__pxenvseg = pxenv_p->RMEntry.segment;
 	__pxenvoff = pxenv_p->RMEntry.offset;
 	
 	v86.ctl  = V86_ADDR | V86_CALLF | V86_FLAGS;
 	v86.es   = VTOPSEG(ptr);
 	v86.edi  = VTOPOFF(ptr);
 	v86.addr = (VTOPSEG(__pxenventry) << 16) | VTOPOFF(__pxenventry);
 	v86.ebx  = func;
 	v86int();
 	v86.ctl  = V86_FLAGS;
 }
 
 void
 bangpxe_call(int func, void *ptr)
 {
 #ifdef PXE_DEBUG
 	if (pxe_debug)
 		printf("bangpxe_call %x\n", func);
 #endif
 
 	bzero(&v86, sizeof(v86));
 
 	__bangpxeseg = pxe_p->EntryPointSP.segment;
 	__bangpxeoff = pxe_p->EntryPointSP.offset;
 
 	v86.ctl  = V86_ADDR | V86_CALLF | V86_FLAGS;
 	v86.edx  = VTOPSEG(ptr);
 	v86.eax  = VTOPOFF(ptr);
 	v86.addr = (VTOPSEG(__bangpxeentry) << 16) | VTOPOFF(__bangpxeentry);
 	v86.ebx  = func;
 	v86int();
 	v86.ctl  = V86_FLAGS;
 }
 
 
 static int
 pxe_netif_match(struct netif *nif, void *machdep_hint)
 {
 	return (1);
 }
 
 static int
 pxe_netif_probe(struct netif *nif, void *machdep_hint)
 {
 	if (pxe_call == NULL)
 		return (-1);
 
 	return (0);
 }
 
 static void
 pxe_netif_end(struct netif *nif)
 {
 	t_PXENV_UNDI_CLOSE *undi_close_p;
 
 	undi_close_p = bio_alloc(sizeof(*undi_close_p));
 	if (undi_close_p != NULL) {
 		bzero(undi_close_p, sizeof(*undi_close_p));
 		pxe_call(PXENV_UNDI_CLOSE, undi_close_p);
 		if (undi_close_p->Status != 0)
 			printf("undi close failed: %x\n", undi_close_p->Status);
 		bio_free(undi_close_p, sizeof(*undi_close_p));
 	}
 }
 
 static void
 pxe_netif_init(struct iodesc *desc, void *machdep_hint)
 {
 	t_PXENV_UNDI_GET_INFORMATION *undi_info_p;
 	t_PXENV_UNDI_OPEN *undi_open_p;
 	uint8_t *mac;
 	int i, len;
 
 	undi_info_p = bio_alloc(sizeof(*undi_info_p));
 	if (undi_info_p == NULL)
 		return;
 
 	bzero(undi_info_p, sizeof(*undi_info_p));
 	pxe_call(PXENV_UNDI_GET_INFORMATION, undi_info_p);
 	if (undi_info_p->Status != 0) {
 		printf("undi get info failed: %x\n", undi_info_p->Status);
 		bio_free(undi_info_p, sizeof(*undi_info_p));
 		return;
 	}
 
 	/* Make sure the CurrentNodeAddress is valid. */
 	for (i = 0; i < undi_info_p->HwAddrLen; ++i) {
 		if (undi_info_p->CurrentNodeAddress[i] != 0)
 			break;
 	}
 	if (i < undi_info_p->HwAddrLen) {
 		for (i = 0; i < undi_info_p->HwAddrLen; ++i) {
 			if (undi_info_p->CurrentNodeAddress[i] != 0xff)
 				break;
 		}
 	}
 	if (i < undi_info_p->HwAddrLen)
 		mac = undi_info_p->CurrentNodeAddress;
 	else
 		mac = undi_info_p->PermNodeAddress;
 
 	len = min(sizeof (desc->myea), undi_info_p->HwAddrLen);
 	for (i = 0; i < len; ++i)
 		desc->myea[i] = mac[i];
 
 	if (bootp_response != NULL)
 		desc->xid = bootp_response->bp_xid;
 	else
 		desc->xid = 0;
 
 	bio_free(undi_info_p, sizeof(*undi_info_p));
 	undi_open_p = bio_alloc(sizeof(*undi_open_p));
 	if (undi_open_p == NULL)
 		return;
 	bzero(undi_open_p, sizeof(*undi_open_p));
 	undi_open_p->PktFilter = FLTR_DIRECTED | FLTR_BRDCST;
 	pxe_call(PXENV_UNDI_OPEN, undi_open_p);
 	if (undi_open_p->Status != 0)
 		printf("undi open failed: %x\n", undi_open_p->Status);
 	bio_free(undi_open_p, sizeof(*undi_open_p));
 }
 
 static int
 pxe_netif_receive(void **pkt)
 {
 	t_PXENV_UNDI_ISR *isr;
 	char *buf, *ptr, *frame;
 	size_t size, rsize;
 
 	isr = bio_alloc(sizeof(*isr));
 	if (isr == NULL)
 		return (-1);
 
 	bzero(isr, sizeof(*isr));
 	isr->FuncFlag = PXENV_UNDI_ISR_IN_START;
 	pxe_call(PXENV_UNDI_ISR, isr);
 	if (isr->Status != 0) {
 		bio_free(isr, sizeof(*isr));
 		return (-1);
 	}
 
 	bzero(isr, sizeof(*isr));
 	isr->FuncFlag = PXENV_UNDI_ISR_IN_PROCESS;
 	pxe_call(PXENV_UNDI_ISR, isr);
 	if (isr->Status != 0) {
 		bio_free(isr, sizeof(*isr));
 		return (-1);
 	}
 
 	while (isr->FuncFlag == PXENV_UNDI_ISR_OUT_TRANSMIT) {
 		/*
 		 * Wait till transmit is done.
 		 */
 		bzero(isr, sizeof(*isr));
 		isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT;
 		pxe_call(PXENV_UNDI_ISR, isr);
 		if (isr->Status != 0 ||
 		    isr->FuncFlag == PXENV_UNDI_ISR_OUT_DONE) {
 			bio_free(isr, sizeof(*isr));
 			return (-1);
 		}
 	}
 
 	while (isr->FuncFlag != PXENV_UNDI_ISR_OUT_RECEIVE) {
 		if (isr->Status != 0 ||
 		    isr->FuncFlag == PXENV_UNDI_ISR_OUT_DONE) {
 			bio_free(isr, sizeof(*isr));
 			return (-1);
 		}
 		bzero(isr, sizeof(*isr));
 		isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT;
 		pxe_call(PXENV_UNDI_ISR, isr);
 	}
 
 	size = isr->FrameLength;
 	buf = malloc(size + ETHER_ALIGN);
 	if (buf == NULL) {
 		bio_free(isr, sizeof(*isr));
 		return (-1);
 	}
 	ptr = buf + ETHER_ALIGN;
 	rsize = 0;
 
 	while (rsize < size) {
 		frame = (char *)((uintptr_t)isr->Frame.segment << 4);
 		frame += isr->Frame.offset;
 		bcopy(PTOV(frame), ptr, isr->BufferLength);
 		ptr += isr->BufferLength;
 		rsize += isr->BufferLength;
 
 		bzero(isr, sizeof(*isr));
 		isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT;
 		pxe_call(PXENV_UNDI_ISR, isr);
 		if (isr->Status != 0) {
 			bio_free(isr, sizeof(*isr));
 			free(buf);
 			return (-1);
 		}
 
 		/* Did we got another update? */
 		if (isr->FuncFlag == PXENV_UNDI_ISR_OUT_RECEIVE)
 			continue;
 		break;
 	}
 
 	*pkt = buf;
 	bio_free(isr, sizeof(*isr));
 	return (rsize);
 }
 
 static ssize_t
 pxe_netif_get(struct iodesc *desc, void **pkt, time_t timeout)
 {
 	time_t t;
 	void *ptr;
 	int ret = -1;
 
 	t = getsecs();
 	while ((getsecs() - t) < timeout) {
 		ret = pxe_netif_receive(&ptr);
 		if (ret != -1) {
 			*pkt = ptr;
 			break;
 		}
 	}
 	return (ret);
 }
 
 static ssize_t
 pxe_netif_put(struct iodesc *desc, void *pkt, size_t len)
 {
 	t_PXENV_UNDI_TRANSMIT *trans_p;
 	t_PXENV_UNDI_TBD *tbd_p;
 	char *data;
 	ssize_t rv = -1;
 
 	trans_p = bio_alloc(sizeof(*trans_p));
 	tbd_p = bio_alloc(sizeof(*tbd_p));
 	data = bio_alloc(len);
 
 	if (trans_p != NULL && tbd_p != NULL && data != NULL) {
 		bzero(trans_p, sizeof(*trans_p));
 		bzero(tbd_p, sizeof(*tbd_p));
 
 		trans_p->TBD.segment = VTOPSEG(tbd_p);
 		trans_p->TBD.offset  = VTOPOFF(tbd_p);
 
 		tbd_p->ImmedLength = len;
 		tbd_p->Xmit.segment = VTOPSEG(data);
 		tbd_p->Xmit.offset  = VTOPOFF(data);
 		bcopy(pkt, data, len);
 
 		pxe_call(PXENV_UNDI_TRANSMIT, trans_p);
 		if (trans_p->Status == 0)
 			rv = len;
 	}
 
 	bio_free(data, len);
 	bio_free(tbd_p, sizeof(*tbd_p));
 	bio_free(trans_p, sizeof(*trans_p));
 	return (rv);
 }
Index: head/stand/i386/libi386/pxe.h
===================================================================
--- head/stand/i386/libi386/pxe.h	(revision 344854)
+++ head/stand/i386/libi386/pxe.h	(revision 344855)
@@ -1,512 +1,511 @@
 /*
  * Copyright (c) 2000 Alfred Perlstein <alfred@freebsd.org>
  * All rights reserved.
  * Copyright (c) 2000 Paul Saab <ps@freebsd.org>
  * All rights reserved.
  * Copyright (c) 2000 John Baldwin <jhb@freebsd.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * The typedefs and structures declared in this file
  * clearly violate style(9), the reason for this is to conform to the
  * typedefs/structure-names used in the Intel literature to avoid confusion.
  *
  * It's for your own good. :)
  */
 
 /* It seems that intel didn't think about ABI,
  * either that or 16bit ABI != 32bit ABI (which seems reasonable)
  * I have to thank Intel for the hair loss I incurred trying to figure
  * out why PXE was mis-reading structures I was passing it (at least
  * from my point of view)
  *
  * Solution: use gcc's '__packed' to correctly align
  * structures passed into PXE
  * Question: does this really work for PXE's expected ABI?
  */
 #define	PACKED		__packed
 
 #define	S_SIZE(s)	s, sizeof(s) - 1
 
 #define	PXENFSROOTPATH	"/pxeroot"
 
 typedef struct {
 	uint16_t		offset;
 	uint16_t		segment;
 } SEGOFF16_t;
 
 typedef struct {
 	uint16_t		Seg_Addr;
 	uint32_t		Phy_Addr;
 	uint16_t		Seg_Size;
 } SEGDESC_t;
 
 typedef	uint16_t		SEGSEL_t;
 typedef	uint16_t		PXENV_STATUS_t;
 typedef	uint32_t		IP4_t;
 typedef	uint32_t		ADDR32_t;
 typedef	uint16_t		UDP_PORT_t;
 
 #define	MAC_ADDR_LEN		16
 typedef	uint8_t			MAC_ADDR[MAC_ADDR_LEN];
 
 /* PXENV+ */
 typedef struct {
 	uint8_t		Signature[6];	/* 'PXENV+' */
 	uint16_t	Version;	/* MSB = major, LSB = minor */
 	uint8_t		Length;		/* structure length */
 	uint8_t		Checksum;	/* checksum pad */
 	SEGOFF16_t	RMEntry;	/* SEG:OFF to PXE entry point */
 	/* don't use PMOffset and PMSelector (from the 2.1 PXE manual) */
 	uint32_t	PMOffset;	/* Protected mode entry */
 	SEGSEL_t	PMSelector;	/* Protected mode selector */
 	SEGSEL_t	StackSeg;	/* Stack segment address */
 	uint16_t	StackSize;	/* Stack segment size (bytes) */
 	SEGSEL_t	BC_CodeSeg;	/* BC Code segment address */
 	uint16_t	BC_CodeSize;	/* BC Code segment size (bytes) */
 	SEGSEL_t	BC_DataSeg;	/* BC Data segment address */
 	uint16_t	BC_DataSize;	/* BC Data segment size (bytes) */
 	SEGSEL_t	UNDIDataSeg;	/* UNDI Data segment address */
 	uint16_t	UNDIDataSize;	/* UNDI Data segment size (bytes) */
 	SEGSEL_t	UNDICodeSeg;	/* UNDI Code segment address */
 	uint16_t	UNDICodeSize;	/* UNDI Code segment size (bytes) */
 	SEGOFF16_t	PXEPtr;		/* SEG:OFF to !PXE struct, 
 					   only present when Version > 2.1 */
 } PACKED pxenv_t;
 
 /* !PXE */
 typedef struct {
 	uint8_t		Signature[4];
 	uint8_t		StructLength;
 	uint8_t		StructCksum;
 	uint8_t		StructRev;
 	uint8_t		reserved_1;
 	SEGOFF16_t	UNDIROMID;
 	SEGOFF16_t	BaseROMID;
 	SEGOFF16_t	EntryPointSP;
 	SEGOFF16_t	EntryPointESP;
 	SEGOFF16_t	StatusCallout;
 	uint8_t		reserved_2;
 	uint8_t		SegDescCn;
 	SEGSEL_t	FirstSelector;
 	SEGDESC_t	Stack;
 	SEGDESC_t	UNDIData;
 	SEGDESC_t	UNDICode;
 	SEGDESC_t	UNDICodeWrite;
 	SEGDESC_t	BC_Data;
 	SEGDESC_t	BC_Code;
 	SEGDESC_t	BC_CodeWrite;
 } PACKED pxe_t;
 
 #define	PXENV_START_UNDI		0x0000
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	ax;
 	uint16_t	bx;
 	uint16_t	dx;
 	uint16_t	di;
 	uint16_t	es;
 } PACKED t_PXENV_START_UNDI;
 
 #define	PXENV_UNDI_STARTUP		0x0001
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_STARTUP;
 
 #define	PXENV_UNDI_CLEANUP		0x0002
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_CLEANUP;
 
 #define	PXENV_UNDI_INITIALIZE		0x0003
 typedef struct {
 	PXENV_STATUS_t	Status;
 	ADDR32_t	ProtocolIni;	/* Phys addr of a copy of the driver module */
 	uint8_t		reserved[8];
 } PACKED t_PXENV_UNDI_INITIALIZE;
 
 
 #define	MAXNUM_MCADDR		8
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	MCastAddrCount;
 	MAC_ADDR	McastAddr[MAXNUM_MCADDR];
 } PACKED t_PXENV_UNDI_MCAST_ADDRESS;
 
 #define	PXENV_UNDI_RESET_ADAPTER	0x0004		
 typedef struct {
 	PXENV_STATUS_t	Status;
 	t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf;
 } PACKED t_PXENV_UNDI_RESET;
 
 #define	PXENV_UNDI_SHUTDOWN		0x0005
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_SHUTDOWN;
 
 #define	PXENV_UNDI_OPEN			0x0006
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	OpenFlag;
 	uint16_t	PktFilter;
 #	define FLTR_DIRECTED	0x0001
 #	define FLTR_BRDCST	0x0002
 #	define FLTR_PRMSCS	0x0004
 #	define FLTR_SRC_RTG	0x0008
 
 	t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf;
 } PACKED t_PXENV_UNDI_OPEN;
 
 #define	PXENV_UNDI_CLOSE		0x0007
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_CLOSE;
 
 #define	PXENV_UNDI_TRANSMIT		0x0008
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		Protocol;
 #	define P_UNKNOWN	0
 #	define P_IP		1
 #	define P_ARP		2
 #	define P_RARP		3
 
 	uint8_t		XmitFlag;
 #	define XMT_DESTADDR	0x0000
 #	define XMT_BROADCAST	0x0001
 
 	SEGOFF16_t	DestAddr;
 	SEGOFF16_t	TBD;
 	uint32_t	Reserved[2];
 } PACKED t_PXENV_UNDI_TRANSMIT;
 
 #define	MAX_DATA_BLKS		8
 typedef struct {
 	uint16_t	ImmedLength;
 	SEGOFF16_t	Xmit;
 	uint16_t	DataBlkCount;
 	struct	DataBlk {
 		uint8_t		TDPtrType;
 		uint8_t		TDRsvdByte;
 		uint16_t	TDDataLen;
 		SEGOFF16_t	TDDataPtr;
 	} DataBlock[MAX_DATA_BLKS];
 } PACKED t_PXENV_UNDI_TBD;
 
 #define	PXENV_UNDI_SET_MCAST_ADDRESS	0x0009
 typedef struct {
 	PXENV_STATUS_t	Status;
 	t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf;
 } PACKED t_PXENV_UNDI_SET_MCAST_ADDR;
 
 #define	PXENV_UNDI_SET_STATION_ADDRESS	0x000A
 typedef struct {
 	PXENV_STATUS_t	Status;
 	MAC_ADDR	StationAddress;		/* Temp MAC address to use */
 } PACKED t_PXENV_UNDI_SET_STATION_ADDR;
 
 #define	PXENV_UNDI_SET_PACKET_FILTER	0x000B
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		filter;			/* see UNDI_OPEN (0x0006) */
 } PACKED t_PXENV_UNDI_SET_PACKET_FILTER;
 
 #define	PXENV_UNDI_GET_INFORMATION	0x000C
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	BaseIo;			/* Adapter base I/O address */
 	uint16_t	IntNumber;		/* Adapter IRQ number */
 	uint16_t	MaxTranUnit;		/* Adapter maximum transmit unit */
 	uint16_t	HwType;			/* Type of protocol at the hardware addr */
 #	define ETHER_TYPE	1
 #	define EXP_ETHER_TYPE	2
 #	define IEEE_TYPE	6
 
 	uint16_t	HwAddrLen;		/* Length of hardware address */
 	MAC_ADDR	CurrentNodeAddress;	/* Current hardware address */
 	MAC_ADDR	PermNodeAddress;	/* Permanent hardware address */
 	SEGSEL_t	ROMAddress;		/* Real mode ROM segment address */
 	uint16_t	RxBufCt;		/* Receive queue length */
 	uint16_t	TxBufCt;		/* Transmit queue length */
 } PACKED t_PXENV_UNDI_GET_INFORMATION;
 
 #define	PXENV_UNDI_GET_STATISTICS	0x000D
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint32_t	XmitGoodFrames;		/* Number of successful transmissions */
 	uint32_t	RcvGoodFrames;		/* Number of good frames received */
 	uint32_t	RcvCRCErrors;		/* Number of frames with CRC errors */
 	uint32_t	RcvResourceErrors;	/* Number of frames dropped */
 } PACKED t_PXENV_UNDI_GET_STATISTICS;
 
 #define	PXENV_UNDI_CLEAR_STATISTICS	0x000E
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_CLEAR_STATISTICS;
 
 #define	PXENV_UNDI_INITIATE_DIAGS	0x000F
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_INITIATE_DIAGS;
 
 #define	PXENV_UNDI_FORCE_INTERRUPT	0x0010
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_FORCE_INTERRUPT;
 
 #define	PXENV_UNDI_GET_MCAST_ADDRESS	0x0011
 typedef struct {
 	PXENV_STATUS_t	Status;
 	IP4_t		InetAddr;		/* IP mulicast address */
 	MAC_ADDR	MediaAddr;		/* MAC multicast address */
 } PACKED t_PXENV_UNDI_GET_MCAST_ADDR;
 
 #define	PXENV_UNDI_GET_NIC_TYPE		0x0012
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		NicType;		/* Type of NIC */
 #	define PCI_NIC		2
 #	define PnP_NIC		3
 #	define CardBus_NIC	4
 
 	union {
 		struct {
 			uint16_t	Vendor_ID;
 			uint16_t	Dev_ID;
 			uint8_t		Base_Class;
 			uint8_t		Sub_Class;
 			uint8_t		Prog_Intf;
 			uint8_t		Rev;
 			uint16_t	BusDevFunc;
 			uint16_t	SubVendor_ID;
 			uint16_t	SubDevice_ID;
 		} pci, cardbus;
 		struct {
 			uint32_t	EISA_Dev_ID;
 			uint8_t		Base_Class;
 			uint8_t		Sub_Class;
 			uint8_t		Prog_Intf;
 			uint16_t	CardSelNum;
 		} pnp;
 	} info;
 } PACKED t_PXENV_UNDI_GET_NIC_TYPE;
 
 #define	PXENV_UNDI_GET_IFACE_INFO	0x0013
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		IfaceType[16];		/* Name of MAC type in ASCII. */
 	uint32_t	LinkSpeed;		/* Defined in NDIS 2.0 spec */
 	uint32_t	ServiceFlags;		/* Defined in NDIS 2.0 spec */
 	uint32_t	Reserved[4];		/* must be 0 */
 } PACKED t_PXENV_UNDI_GET_NDIS_INFO;
 
 #define	PXENV_UNDI_ISR			0x0014
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	FuncFlag;		/* PXENV_UNDI_ISR_OUT_xxx */
 	uint16_t	BufferLength;		/* Length of Frame */
 	uint16_t	FrameLength;		/* Total length of receiver frame */
 	uint16_t	FrameHeaderLength;	/* Length of the media header in Frame */
 	SEGOFF16_t	Frame;			/* receive buffer */
 	uint8_t		ProtType;		/* Protocol type */
 	uint8_t		PktType;		/* Packet Type */
 #	define PXENV_UNDI_ISR_IN_START		1
 #	define PXENV_UNDI_ISR_IN_PROCESS	2
 #	define PXENV_UNDI_ISR_IN_GET_NEXT	3
 
 	/* one of these will be returned for PXENV_UNDI_ISR_IN_START */
 #	define PXENV_UNDI_ISR_OUT_OURS		0
 #	define PXENV_UNDI_ISR_OUT_NOT_OUTS	1
 
 	/*
 	 * one of these will bre returned for PXEND_UNDI_ISR_IN_PROCESS
 	 * and PXENV_UNDI_ISR_IN_GET_NEXT
 	 */
 #	define PXENV_UNDI_ISR_OUT_DONE		0
 #	define PXENV_UNDI_ISR_OUT_TRANSMIT	2
 #	define PXENV_UNDI_ISR_OUT_RECEIVE	3
 #	define PXENV_UNDI_ISR_OUT_BUSY		4
 } PACKED t_PXENV_UNDI_ISR;
 
 #define	PXENV_STOP_UNDI			0x0015
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_STOP_UNDI;
 
 #define	PXENV_TFTP_OPEN			0x0020
 typedef struct {
 	PXENV_STATUS_t	Status;
 	IP4_t		ServerIPAddress;
 	IP4_t		GatewayIPAddress;
 	uint8_t		FileName[128];
 	UDP_PORT_t	TFTPPort;
 	uint16_t	PacketSize;
 } PACKED t_PXENV_TFTP_OPEN;
 
 #define	PXENV_TFTP_CLOSE		0x0021
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_TFTP_CLOSE;
 
 #define	PXENV_TFTP_READ			0x0022
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	PacketNumber;
 	uint16_t	BufferSize;
 	SEGOFF16_t	Buffer;
 } PACKED t_PXENV_TFTP_READ;
 
 #define	PXENV_TFTP_READ_FILE		0x0023
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		FileName[128];
 	uint32_t	BufferSize;
 	ADDR32_t	Buffer;
 	IP4_t		ServerIPAddress;
 	IP4_t		GatewayIPAdress;
 	IP4_t		McastIPAdress;
 	UDP_PORT_t	TFTPClntPort;
 	UDP_PORT_t	TFTPSrvPort;
 	uint16_t	TFTPOpenTimeOut;
 	uint16_t	TFTPReopenDelay;
 } PACKED t_PXENV_TFTP_READ_FILE;
 
 #define	PXENV_TFTP_GET_FSIZE		0x0025
 typedef struct {
 	PXENV_STATUS_t	Status;
 	IP4_t		ServerIPAddress;
 	IP4_t		GatewayIPAdress;
 	uint8_t		FileName[128];
 	uint32_t	FileSize;
 } PACKED t_PXENV_TFTP_GET_FSIZE;
 
 #define	PXENV_UDP_OPEN			0x0030
 typedef struct {
 	PXENV_STATUS_t	status;
 	IP4_t		src_ip;		/* IP address of this station */
 } PACKED t_PXENV_UDP_OPEN;
 
 #define	PXENV_UDP_CLOSE			0x0031
 typedef struct {
 	PXENV_STATUS_t	status;
 } PACKED t_PXENV_UDP_CLOSE;
 
 #define	PXENV_UDP_READ			0x0032
 typedef struct {
 	PXENV_STATUS_t	status;
 	IP4_t		src_ip;		/* IP of sender */
 	IP4_t		dest_ip;	/* Only accept packets sent to this IP */
 	UDP_PORT_t	s_port;		/* UDP source port of sender */
 	UDP_PORT_t	d_port;		/* Only accept packets sent to this port */
 	uint16_t	buffer_size;	/* Size of the packet buffer */
 	SEGOFF16_t	buffer;		/* SEG:OFF to the packet buffer */
 } PACKED t_PXENV_UDP_READ;
 
 #define	PXENV_UDP_WRITE			0x0033
 typedef struct {
 	PXENV_STATUS_t	status;
 	IP4_t		ip;		/* dest ip addr */
 	IP4_t		gw;		/* ip gateway */
 	UDP_PORT_t	src_port;	/* source udp port */
 	UDP_PORT_t	dst_port;	/* destination udp port */
 	uint16_t	buffer_size;	/* Size of the packet buffer */
 	SEGOFF16_t	buffer;		/* SEG:OFF to the packet buffer */
 } PACKED t_PXENV_UDP_WRITE;
 
 #define	PXENV_UNLOAD_STACK		0x0070
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		reserved[10];
 } PACKED t_PXENV_UNLOAD_STACK;
 
 
 #define	PXENV_GET_CACHED_INFO		0x0071
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	PacketType;	/* type (defined right here) */
 #	define PXENV_PACKET_TYPE_DHCP_DISCOVER  1
 #	define PXENV_PACKET_TYPE_DHCP_ACK       2
 #	define PXENV_PACKET_TYPE_BINL_REPLY     3
 	uint16_t	BufferSize;	/* max to copy, leave at 0 for pointer */
 	SEGOFF16_t	Buffer;		/* copy to, leave at 0 for pointer */
 	uint16_t	BufferLimit;	/* max size of buffer in BC dataseg ? */
 } PACKED t_PXENV_GET_CACHED_INFO;
 
 
 /* structure filled in by PXENV_GET_CACHED_INFO 
  * (how we determine which IP we downloaded the initial bootstrap from)
  * words can't describe...
  */
 typedef struct {
 	uint8_t		opcode;
 #	define BOOTP_REQ	1
 #	define BOOTP_REP	2
 	uint8_t		Hardware;	/* hardware type */
 	uint8_t		Hardlen;	/* hardware addr len */
 	uint8_t		Gatehops;	/* zero it */
 	uint32_t	ident;		/* random number chosen by client */
 	uint16_t	seconds;	/* seconds since did initial bootstrap */
 	uint16_t	Flags;		/* seconds since did initial bootstrap */
 #	define BOOTP_BCAST	0x8000		/* ? */
 	IP4_t		cip;		/* Client IP */
 	IP4_t		yip;		/* Your IP */
 	IP4_t		sip;		/* IP to use for next boot stage */
 	IP4_t		gip;		/* Relay IP ? */
 	MAC_ADDR	CAddr;		/* Client hardware address */
 	uint8_t		Sname[64];	/* Server's hostname (Optional) */
 	uint8_t		bootfile[128];	/* boot filename */
 	union {
 #		if 1
 #		define BOOTP_DHCPVEND  1024    /* DHCP extended vendor field size */
 #		else
 #		define BOOTP_DHCPVEND  312	/* DHCP standard vendor field size */
 #		endif
 		uint8_t		d[BOOTP_DHCPVEND];	/* raw array of vendor/dhcp options */
 		struct {
 			uint8_t		magic[4];	/* DHCP magic cookie */
 #			ifndef		VM_RFC1048
 #			define		VM_RFC1048	0x63825363L	/* ? */
 #			endif
 			uint32_t	flags;		/* bootp flags/opcodes */
 			uint8_t		pad[56];	/* I don't think intel knows what a
 							   union does... */
 		} v;
 	} vendor;
 } PACKED BOOTPLAYER;
 
 #define	PXENV_RESTART_TFTP		0x0073
 #define	t_PXENV_RESTART_TFTP		t_PXENV_TFTP_READ_FILE
 
 #define	PXENV_START_BASE		0x0075
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_START_BASE;
 
 #define	PXENV_STOP_BASE			0x0076
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_STOP_BASE;
Index: head/stand/i386/pxeldr/pxeldr.S
===================================================================
--- head/stand/i386/pxeldr/pxeldr.S	(revision 344854)
+++ head/stand/i386/pxeldr/pxeldr.S	(revision 344855)
@@ -1,301 +1,300 @@
 /*
  * Copyright (c) 2000 John Baldwin
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This simple program is a preloader for the normal boot3 loader.  It is simply
  * prepended to the beginning of a fully built and btxld'd loader.  It then
  * copies the loader to the address boot2 normally loads it, emulates the
  * boot[12] environment (protected mode, a bootinfo struct, etc.), and then jumps
  * to the start of btxldr to start the boot process.  This method allows a stock
  * /boot/loader to be booted over the network via PXE w/o having to write a
  * separate PXE-aware client just to load the loader.
  */
 
 #include <sys/reboot.h>
 #include <bootargs.h>
 
 /*
  * Memory locations.
  */
 		.set MEM_PAGE_SIZE,0x1000	# memory page size, 4k
 		.set MEM_ARG,0x900		# Arguments at start
 		.set MEM_ARG_BTX,0xa100		# Where we move them to so the
 						#  BTX client can see them
 		.set MEM_ARG_SIZE,0x18		# Size of the arguments
 		.set MEM_BTX_ADDRESS,0x9000	# where BTX lives
 		.set MEM_BTX_ENTRY,0x9010	# where BTX starts to execute
 		.set MEM_BTX_OFFSET,MEM_PAGE_SIZE # offset of BTX in the loader
 		.set MEM_BTX_CLIENT,0xa000	# where BTX clients live
 		.set MEM_BIOS_KEYBOARD,0x496	# BDA byte with keyboard bit
 /*
  * a.out header fields
  */
 		.set AOUT_TEXT,0x04		# text segment size
 		.set AOUT_DATA,0x08		# data segment size
 		.set AOUT_BSS,0x0c		# zero'd BSS size
 		.set AOUT_SYMBOLS,0x10		# symbol table
 		.set AOUT_ENTRY,0x14		# entry point
 		.set AOUT_HEADER,MEM_PAGE_SIZE	# size of the a.out header
 /*
  * Segment selectors.
  */
 		.set SEL_SDATA,0x8		# Supervisor data
 		.set SEL_RDATA,0x10		# Real mode data
 		.set SEL_SCODE,0x18		# PM-32 code
 		.set SEL_SCODE16,0x20		# PM-16 code
 /*
  * BTX constants
  */
 		.set INT_SYS,0x30		# BTX syscall interrupt
 /*
  * Bit in MEM_BIOS_KEYBOARD that is set if an enhanced keyboard is present
  */
 		.set KEYBOARD_BIT,0x10
 /*
  * We expect to be loaded by the BIOS at 0x7c00 (standard boot loader entry
  * point)
  */
 		.code16
 		.globl start
 		.org 0x0, 0x0
 /*
  * BTX program loader for PXE network booting
  */
 start:		cld				# string ops inc
 		xorw %ax, %ax			# zero %ax
 		movw %ax, %ss			# setup the
 		movw $start, %sp		#  stack
 		movw %es, %cx			# save PXENV+ segment
 		movw %ax, %ds			# setup the
 		movw %ax, %es			#  data segments
 		andl $0xffff, %ecx		# clear upper words
 		andl $0xffff, %ebx		#  of %ebx and %ecx
 		shll $4, %ecx			# calculate the offset of
 		addl %ebx, %ecx			#  the PXENV+ struct and
 		pushl %ecx			#  save it on the stack
 		movw $welcome_msg, %si		# %ds:(%si) -> welcome message
 		callw putstr			# display the welcome message
 /*
  * Setup the arguments that the loader is expecting from boot[12]
  */
 		movw $bootinfo_msg, %si		# %ds:(%si) -> boot args message
 		callw putstr			# display the message
 		movw $MEM_ARG, %bx		# %ds:(%bx) -> boot args
 		movw %bx, %di			# %es:(%di) -> boot args
 		xorl %eax, %eax			# zero %eax
 		movw $(MEM_ARG_SIZE/4), %cx	# Size of arguments in 32-bit
 						#  dwords
 		rep				# Clear the arguments
 		stosl				#  to zero
 		orb $KARGS_FLAGS_PXE, 0x8(%bx)	# kargs->bootflags |=
 						#  KARGS_FLAGS_PXE
 		popl 0xc(%bx)			# kargs->pxeinfo = *PXENV+
 #ifdef ALWAYS_SERIAL
 /*
  * set the RBX_SERIAL bit in the howto byte.
  */
 		orl $RB_SERIAL, (%bx)		# enable serial console
 #endif
 #ifdef PROBE_KEYBOARD
 /*
  * Look at the BIOS data area to see if we have an enhanced keyboard.  If not,
  * set the RBX_DUAL and RBX_SERIAL bits in the howto byte.
  */
 		testb $KEYBOARD_BIT, MEM_BIOS_KEYBOARD # keyboard present?
 		jnz keyb			# yes, so skip
 		orl $(RB_MULTIPLE | RB_SERIAL), (%bx) # enable serial console
 keyb:
 #endif
 /*
  * Turn on the A20 address line
  */
 		callw seta20			# Turn A20 on
 /*
  * Relocate the loader and BTX using a very lazy protected mode
  */
 		movw $relocate_msg, %si		# Display the
 		callw putstr			#  relocation message
 		movl end+AOUT_ENTRY, %edi	# %edi is the destination
 		movl $(end+AOUT_HEADER), %esi	# %esi is
 						#  the start of the text
 						#  segment
 		movl end+AOUT_TEXT, %ecx	# %ecx = length of the text
 						#  segment
 		lgdt gdtdesc			# setup our own gdt
 		cli				# turn off interrupts
 		movl %cr0, %eax			# Turn on
 		orb $0x1, %al			#  protected
 		movl %eax, %cr0			#  mode
 		ljmp $SEL_SCODE,$pm_start	# long jump to clear the
 						#  instruction pre-fetch queue
 		.code32
 pm_start:	movw $SEL_SDATA, %ax		# Initialize
 		movw %ax, %ds			#  %ds and
 		movw %ax, %es			#  %es to a flat selector
 		rep				# Relocate the
 		movsb				#  text segment
 		addl $(MEM_PAGE_SIZE - 1), %edi	# pad %edi out to a new page
 		andl $~(MEM_PAGE_SIZE - 1), %edi #  for the data segment
 		movl end+AOUT_DATA, %ecx	# size of the data segment
 		rep				# Relocate the
 		movsb				#  data segment
 		movl end+AOUT_BSS, %ecx		# size of the bss
 		xorl %eax, %eax			# zero %eax
 		addb $3, %cl			# round %ecx up to
 		shrl $2, %ecx			#  a multiple of 4
 		rep				# zero the
 		stosl				#  bss
 		movl end+AOUT_ENTRY, %esi	# %esi -> relocated loader
 		addl $MEM_BTX_OFFSET, %esi	# %esi -> BTX in the loader
 		movl $MEM_BTX_ADDRESS, %edi	# %edi -> where BTX needs to go
 		movzwl 0xa(%esi), %ecx		# %ecx -> length of BTX
 		rep				# Relocate
 		movsb				#  BTX
 		ljmp $SEL_SCODE16,$pm_16	# Jump to 16-bit PM
 		.code16
 pm_16:		movw $SEL_RDATA, %ax		# Initialize
 		movw %ax, %ds			#  %ds and
 		movw %ax, %es			#  %es to a real mode selector
 		movl %cr0, %eax			# Turn off
 		andb $~0x1, %al			#  protected
 		movl %eax, %cr0			#  mode
 		ljmp $0,$pm_end			# Long jump to clear the
 						#  instruction pre-fetch queue
 pm_end:		sti				# Turn interrupts back on now
 /*
  * Copy the BTX client to MEM_BTX_CLIENT
  */
 		xorw %ax, %ax			# zero %ax and set
 		movw %ax, %ds			#  %ds and %es
 		movw %ax, %es			#  to segment 0
 		movw $MEM_BTX_CLIENT, %di	# Prepare to relocate
 		movw $btx_client, %si		#  the simple btx client
 		movw $(btx_client_end-btx_client), %cx # length of btx client
 		rep				# Relocate the
 		movsb				#  simple BTX client
 /*
  * Copy the boot[12] args to where the BTX client can see them
  */
 		movw $MEM_ARG, %si		# where the args are at now
 		movw $MEM_ARG_BTX, %di		# where the args are moving to
 		movw $(MEM_ARG_SIZE/4), %cx	# size of the arguments in longs
 		rep				# Relocate
 		movsl				#  the words
 /*
  * Save the entry point so the client can get to it later on
  */
 		movl end+AOUT_ENTRY, %eax	# load the entry point
 		stosl				# add it to the end of the
 						#  arguments
 /*
  * Now we just start up BTX and let it do the rest
  */
 		movw $jump_message, %si		# Display the
 		callw putstr			#  jump message
 		ljmp $0,$MEM_BTX_ENTRY		# Jump to the BTX entry point
 
 /*
  * Display a null-terminated string
  */
 putstr:		lodsb				# load %al from %ds:(%si)
 		testb %al,%al			# stop at null
 		jnz putc			# if the char != null, output it
 		retw				# return when null is hit
 putc:		movw $0x7,%bx			# attribute for output
 		movb $0xe,%ah			# BIOS: put_char
 		int $0x10			# call BIOS, print char in %al
 		jmp putstr			# keep looping
 
 /*
  * Enable A20. Put an upper limit on the amount of time we wait for the
  * keyboard controller to get ready (65K x ISA access time). If
  * we wait more than that amount, the hardware is probably
  * legacy-free and simply doesn't have a keyboard controller.
  * Thus, the A20 line is already enabled.
  */
 seta20: 	cli				# Disable interrupts
 		xor %cx,%cx			# Clear
 seta20.1:	inc %cx				# Increment, overflow?
 		jz seta20.3			# Yes
 		inb $0x64,%al			# Get status
 		testb $0x2,%al			# Busy?
 		jnz seta20.1			# Yes
 		movb $0xd1,%al			# Command: Write
 		outb %al,$0x64			#  output port
 seta20.2:	inb $0x64,%al			# Get status
 		testb $0x2,%al			# Busy?
 		jnz seta20.2			# Yes
 		movb $0xdf,%al			# Enable
 		outb %al,$0x60			#  A20
 seta20.3:	sti				# Enable interrupts
 		retw				# To caller
 
 /*
  * BTX client to start btxldr
  */
 		.code32
 btx_client:	movl $(MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE-4), %esi
 						# %ds:(%esi) -> end
 						#  of boot[12] args
 		movl $(MEM_ARG_SIZE/4), %ecx	# Number of words to push
 		std				# Go backwards
 push_arg:	lodsl				# Read argument
 		pushl %eax			# Push it onto the stack
 		loop push_arg			# Push all of the arguments
 		cld				# In case anyone depends on this
 		pushl MEM_ARG_BTX-MEM_BTX_CLIENT+MEM_ARG_SIZE # Entry point of
 						#  the loader
 		pushl %eax			# Emulate a near call
 		movl $0x1, %eax			# 'exec' system call
 		int $INT_SYS			# BTX system call
 btx_client_end:
 		.code16
 
 		.p2align 4
 /*
  * Global descriptor table.
  */
 gdt:		.word 0x0,0x0,0x0,0x0		# Null entry
 		.word 0xffff,0x0,0x9200,0xcf	# SEL_SDATA
 		.word 0xffff,0x0,0x9200,0x0	# SEL_RDATA
 		.word 0xffff,0x0,0x9a00,0xcf	# SEL_SCODE (32-bit)
 		.word 0xffff,0x0,0x9a00,0x8f	# SEL_SCODE16 (16-bit)
 gdt.1:
 /*
  * Pseudo-descriptors.
  */
 gdtdesc:	.word gdt.1-gdt-1		# Limit
 		.long gdt			# Base
 
 welcome_msg:	.asciz	"PXE Loader 1.00\r\n\n"
 bootinfo_msg:	.asciz	"Building the boot loader arguments\r\n"
 relocate_msg:	.asciz	"Relocating the loader and the BTX\r\n"
 jump_message:	.asciz	"Starting the BTX loader\r\n"
 
 		.p2align 4
 end:
Index: head/sys/amd64/include/intr_machdep.h
===================================================================
--- head/sys/amd64/include/intr_machdep.h	(revision 344854)
+++ head/sys/amd64/include/intr_machdep.h	(revision 344855)
@@ -1,45 +1,44 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __MACHINE_INTR_MACHDEP_H__
 #define	__MACHINE_INTR_MACHDEP_H__
 
 #include <x86/intr_machdep.h>
 
 /*
  * The following data structure holds per-cpu data, and is placed just
  * above the top of the space used for the NMI and MC# stacks.
  */
 struct nmi_pcpu {
 	register_t	np_pcpu;
 	register_t	__padding;	/* pad to 16 bytes */
 };
 
 #endif	/* !__MACHINE_INTR_MACHDEP_H__ */
Index: head/sys/arm/arm/ptrace_machdep.c
===================================================================
--- head/sys/arm/arm/ptrace_machdep.c	(revision 344854)
+++ head/sys/arm/arm/ptrace_machdep.c	(revision 344855)
@@ -1,63 +1,62 @@
 /*-
  * Copyright (c) 2017 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 int
 cpu_ptrace(struct thread *td, int req, void *addr, int data)
 {
 #ifdef VFP
 	mcontext_vfp_t vfp;
 #endif
 	int error;
 
 	switch (req) {
 #ifdef VFP
 	case PT_GETVFPREGS:
 		get_vfpcontext(td, &vfp);
 		error = copyout(&vfp, addr, sizeof(vfp));
 		break;
 	case PT_SETVFPREGS:
 		error = copyin(addr, &vfp, sizeof(vfp));
 		if (error == 0)
 			set_vfpcontext(td, &vfp);
 		break;
 #endif
 	default:
 		error = EINVAL;
 	}
 
 	return (error);
 }
Index: head/sys/dev/acpica/acpi_isab.c
===================================================================
--- head/sys/dev/acpica/acpi_isab.c	(revision 344854)
+++ head/sys/dev/acpica/acpi_isab.c	(revision 344855)
@@ -1,131 +1,130 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * ISA Bridge driver for Generic ISA Bus Devices.  See section 10.7 of the
  * ACPI 2.0a specification for details on this device.
  */
 
 #include "opt_acpi.h"
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 
 #include <dev/acpica/acpivar.h>
 #include <isa/isavar.h>
 
 /* Hooks for the ACPI CA debugging infrastructure. */
 #define _COMPONENT	ACPI_BUS
 ACPI_MODULE_NAME("ISA_ACPI")
 
 struct acpi_isab_softc {
 	device_t	ap_dev;
 	ACPI_HANDLE	ap_handle;
 };
 
 static int	acpi_isab_probe(device_t bus);
 static int	acpi_isab_attach(device_t bus);
 static int	acpi_isab_read_ivar(device_t dev, device_t child, int which,
 		    uintptr_t *result);
 
 static device_method_t acpi_isab_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		acpi_isab_probe),
 	DEVMETHOD(device_attach,	acpi_isab_attach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,	acpi_isab_read_ivar),
 	DEVMETHOD(bus_alloc_resource,	bus_generic_alloc_resource),
 	DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
 
 	DEVMETHOD_END
 };
 
 static driver_t acpi_isab_driver = {
 	"isab",
 	acpi_isab_methods,
 	sizeof(struct acpi_isab_softc),
 };
 
 DRIVER_MODULE(acpi_isab, acpi, acpi_isab_driver, isab_devclass, 0, 0);
 MODULE_DEPEND(acpi_isab, acpi, 1, 1, 1);
 
 static int
 acpi_isab_probe(device_t dev)
 {
 	static char *isa_ids[] = { "PNP0A05", "PNP0A06", NULL };
 	int rv;
 	
 	if (acpi_disabled("isab") ||
 	    devclass_get_device(isab_devclass, 0) != dev)
 		return (ENXIO);
 	rv = ACPI_ID_PROBE(device_get_parent(dev), dev, isa_ids, NULL);
 	if (rv <= 0)
 		device_set_desc(dev, "ACPI Generic ISA bridge");
 	return (rv);
 }
 
 static int
 acpi_isab_attach(device_t dev)
 {
 	struct acpi_isab_softc *sc;
 
 	ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
 	sc = device_get_softc(dev);
 	sc->ap_dev = dev;
 	sc->ap_handle = acpi_get_handle(dev);
 
 	return (isab_attach(dev));
 }
 
 static int
 acpi_isab_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
 	struct acpi_isab_softc *sc = device_get_softc(dev);
 
 	switch (which) {
 	case ACPI_IVAR_HANDLE:
 		*result = (uintptr_t)sc->ap_handle;
 		return (0);
 	}
 	return (ENOENT);
 }
Index: head/sys/dev/acpica/acpi_pcivar.h
===================================================================
--- head/sys/dev/acpica/acpi_pcivar.h	(revision 344854)
+++ head/sys/dev/acpica/acpi_pcivar.h	(revision 344855)
@@ -1,38 +1,37 @@
 /*-
  * Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_ACPI_PCIVAR_H_
 #define	_ACPI_PCIVAR_H_
 
 #ifdef _KERNEL
 
 void	acpi_pci_child_added(device_t dev, device_t child);
 
 #endif
 
 #endif /* !_ACPI_PCIVAR_H_ */
Index: head/sys/dev/pci/vga_pci.c
===================================================================
--- head/sys/dev/pci/vga_pci.c	(revision 344854)
+++ head/sys/dev/pci/vga_pci.c	(revision 344855)
@@ -1,685 +1,684 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Simple driver for PCI VGA display devices.  Drivers such as agp(4) and
  * drm(4) should attach as children of this device.
  *
  * XXX: The vgapci name is a hack until we somehow merge the isa vga driver
  * in or rename it.
  */
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #if defined(__amd64__) || defined(__i386__)
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #endif
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <compat/x86bios/x86bios.h> /* To re-POST the card. */
 
 struct vga_resource {
 	struct resource	*vr_res;
 	int	vr_refs;
 };
 
 struct vga_pci_softc {
 	device_t	vga_msi_child;	/* Child driver using MSI. */
 	struct vga_resource vga_bars[PCIR_MAX_BAR_0 + 1];
 	struct vga_resource vga_bios;
 };
 
 SYSCTL_DECL(_hw_pci);
 
 static struct vga_resource *lookup_res(struct vga_pci_softc *sc, int rid);
 static struct resource *vga_pci_alloc_resource(device_t dev, device_t child,
     int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count,
     u_int flags);
 static int	vga_pci_release_resource(device_t dev, device_t child, int type,
     int rid, struct resource *r);
 
 int vga_pci_default_unit = -1;
 SYSCTL_INT(_hw_pci, OID_AUTO, default_vgapci_unit, CTLFLAG_RDTUN,
     &vga_pci_default_unit, -1, "Default VGA-compatible display");
 
 int
 vga_pci_is_boot_display(device_t dev)
 {
 	int unit;
 	device_t pcib;
 	uint16_t config;
 
 	/* Check that the given device is a video card */
 	if ((pci_get_class(dev) != PCIC_DISPLAY &&
 	    (pci_get_class(dev) != PCIC_OLD ||
 	     pci_get_subclass(dev) != PCIS_OLD_VGA)))
 		return (0);
 
 	unit = device_get_unit(dev);
 
 	if (vga_pci_default_unit >= 0) {
 		/*
 		 * The boot display device was determined by a previous
 		 * call to this function, or the user forced it using
 		 * the hw.pci.default_vgapci_unit tunable.
 		 */
 		return (vga_pci_default_unit == unit);
 	}
 
 	/*
 	 * The primary video card used as a boot display must have the
 	 * "I/O" and "Memory Address Space Decoding" bits set in its
 	 * Command register.
 	 *
 	 * Furthermore, if the card is attached to a bridge, instead of
 	 * the root PCI bus, the bridge must have the "VGA Enable" bit
 	 * set in its Control register.
 	 */
 
 	pcib = device_get_parent(device_get_parent(dev));
 	if (device_get_devclass(device_get_parent(pcib)) ==
 	    devclass_find("pci")) {
 		/*
 		 * The parent bridge is a PCI-to-PCI bridge: check the
 		 * value of the "VGA Enable" bit.
 		 */
 		config = pci_read_config(pcib, PCIR_BRIDGECTL_1, 2);
 		if ((config & PCIB_BCR_VGA_ENABLE) == 0)
 			return (0);
 	}
 
 	config = pci_read_config(dev, PCIR_COMMAND, 2);
 	if ((config & (PCIM_CMD_PORTEN | PCIM_CMD_MEMEN)) == 0)
 		return (0);
 
 	/*
 	 * Disable interrupts until a chipset driver is loaded for
 	 * this PCI device. Else unhandled display adapter interrupts
 	 * might freeze the CPU.
 	 */
 	pci_write_config(dev, PCIR_COMMAND, config | PCIM_CMD_INTxDIS, 2);
 
 	/* This video card is the boot display: record its unit number. */
 	vga_pci_default_unit = unit;
 	device_set_flags(dev, 1);
 
 	return (1);
 }
 
 void *
 vga_pci_map_bios(device_t dev, size_t *size)
 {
 	int rid;
 	struct resource *res;
 
 #if defined(__amd64__) || defined(__i386__)
 	if (vga_pci_is_boot_display(dev)) {
 		/*
 		 * On x86, the System BIOS copy the default display
 		 * device's Video BIOS at a fixed location in system
 		 * memory (0xC0000, 128 kBytes long) at boot time.
 		 *
 		 * We use this copy for the default boot device, because
 		 * the original ROM may not be valid after boot.
 		 */
 
 		*size = VGA_PCI_BIOS_SHADOW_SIZE;
 		return (pmap_mapbios(VGA_PCI_BIOS_SHADOW_ADDR, *size));
 	}
 #endif
 
 	rid = PCIR_BIOS;
 	res = vga_pci_alloc_resource(dev, NULL, SYS_RES_MEMORY, &rid, 0,
 	    ~0, 1, RF_ACTIVE);
 	if (res == NULL) {
 		return (NULL);
 	}
 
 	*size = rman_get_size(res);
 	return (rman_get_virtual(res));
 }
 
 void
 vga_pci_unmap_bios(device_t dev, void *bios)
 {
 	struct vga_resource *vr;
 
 	if (bios == NULL) {
 		return;
 	}
 
 #if defined(__amd64__) || defined(__i386__)
 	if (vga_pci_is_boot_display(dev)) {
 		/* We mapped the BIOS shadow copy located at 0xC0000. */
 		pmap_unmapdev((vm_offset_t)bios, VGA_PCI_BIOS_SHADOW_SIZE);
 
 		return;
 	}
 #endif
 
 	/*
 	 * Look up the PCIR_BIOS resource in our softc.  It should match
 	 * the address we returned previously.
 	 */
 	vr = lookup_res(device_get_softc(dev), PCIR_BIOS);
 	KASSERT(vr->vr_res != NULL, ("vga_pci_unmap_bios: bios not mapped"));
 	KASSERT(rman_get_virtual(vr->vr_res) == bios,
 	    ("vga_pci_unmap_bios: mismatch"));
 	vga_pci_release_resource(dev, NULL, SYS_RES_MEMORY, PCIR_BIOS,
 	    vr->vr_res);
 }
 
 int
 vga_pci_repost(device_t dev)
 {
 #if defined(__amd64__) || defined(__i386__)
 	x86regs_t regs;
 
 	if (!vga_pci_is_boot_display(dev))
 		return (EINVAL);
 
 	if (x86bios_get_orm(VGA_PCI_BIOS_SHADOW_ADDR) == NULL)
 		return (ENOTSUP);
 
 	x86bios_init_regs(&regs);
 
 	regs.R_AH = pci_get_bus(dev);
 	regs.R_AL = (pci_get_slot(dev) << 3) | (pci_get_function(dev) & 0x07);
 	regs.R_DL = 0x80;
 
 	device_printf(dev, "REPOSTing\n");
 	x86bios_call(&regs, X86BIOS_PHYSTOSEG(VGA_PCI_BIOS_SHADOW_ADDR + 3),
 	    X86BIOS_PHYSTOOFF(VGA_PCI_BIOS_SHADOW_ADDR + 3));
 
 	x86bios_get_intr(0x10);
 
 	return (0);
 #else
 	return (ENOTSUP);
 #endif
 }
 
 static int
 vga_pci_probe(device_t dev)
 {
 
 	switch (pci_get_class(dev)) {
 	case PCIC_DISPLAY:
 		break;
 	case PCIC_OLD:
 		if (pci_get_subclass(dev) != PCIS_OLD_VGA)
 			return (ENXIO);
 		break;
 	default:
 		return (ENXIO);
 	}
 
 	/* Probe default display. */
 	vga_pci_is_boot_display(dev);
 
 	device_set_desc(dev, "VGA-compatible display");
 	return (BUS_PROBE_GENERIC);
 }
 
 static int
 vga_pci_attach(device_t dev)
 {
 
 	bus_generic_probe(dev);
 
 	/* Always create a drm child for now to make it easier on drm. */
 	device_add_child(dev, "drm", -1);
 	device_add_child(dev, "drmn", -1);
 	bus_generic_attach(dev);
 
 	if (vga_pci_is_boot_display(dev))
 		device_printf(dev, "Boot video device\n");
 
 	return (0);
 }
 
 static int
 vga_pci_suspend(device_t dev)
 {
 
 	return (bus_generic_suspend(dev));
 }
 
 static int
 vga_pci_detach(device_t dev)
 {
 	int error; 
 
 	error = bus_generic_detach(dev);
 	if (error == 0)
 		error = device_delete_children(dev);
 	return (error);
 }
 
 static int
 vga_pci_resume(device_t dev)
 {
 
 	return (bus_generic_resume(dev));
 }
 
 /* Bus interface. */
 
 static int
 vga_pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
 
 	return (BUS_READ_IVAR(device_get_parent(dev), dev, which, result));
 }
 
 static int
 vga_pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
 {
 
 	return (EINVAL);
 }
 
 static int
 vga_pci_setup_intr(device_t dev, device_t child, struct resource *irq,
     int flags, driver_filter_t *filter, driver_intr_t *intr, void *arg,
     void **cookiep)
 {
 	return (BUS_SETUP_INTR(device_get_parent(dev), dev, irq, flags,
 	    filter, intr, arg, cookiep));
 }
 
 static int
 vga_pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
     void *cookie)
 {
 	return (BUS_TEARDOWN_INTR(device_get_parent(dev), dev, irq, cookie));
 }
 
 static struct vga_resource *
 lookup_res(struct vga_pci_softc *sc, int rid)
 {
 	int bar;
 
 	if (rid == PCIR_BIOS)
 		return (&sc->vga_bios);
 	bar = PCI_RID2BAR(rid);
 	if (bar >= 0 && bar <= PCIR_MAX_BAR_0)
 		return (&sc->vga_bars[bar]);
 	return (NULL);
 }
 
 static struct resource *
 vga_pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct vga_resource *vr;
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 	case SYS_RES_IOPORT:
 		/*
 		 * For BARs, we cache the resource so that we only allocate it
 		 * from the PCI bus once.
 		 */
 		vr = lookup_res(device_get_softc(dev), *rid);
 		if (vr == NULL)
 			return (NULL);
 		if (vr->vr_res == NULL)
 			vr->vr_res = bus_alloc_resource(dev, type, rid, start,
 			    end, count, flags);
 		if (vr->vr_res != NULL)
 			vr->vr_refs++;
 		return (vr->vr_res);
 	}
 	return (bus_alloc_resource(dev, type, rid, start, end, count, flags));
 }
 
 static int
 vga_pci_release_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
 	struct vga_resource *vr;
 	int error;
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 	case SYS_RES_IOPORT:
 		/*
 		 * For BARs, we release the resource from the PCI bus
 		 * when the last child reference goes away.
 		 */
 		vr = lookup_res(device_get_softc(dev), rid);
 		if (vr == NULL)
 			return (EINVAL);
 		if (vr->vr_res == NULL)
 			return (EINVAL);
 		KASSERT(vr->vr_res == r, ("vga_pci resource mismatch"));
 		if (vr->vr_refs > 1) {
 			vr->vr_refs--;
 			return (0);
 		}
 		KASSERT(vr->vr_refs > 0,
 		    ("vga_pci resource reference count underflow"));
 		error = bus_release_resource(dev, type, rid, r);
 		if (error == 0) {
 			vr->vr_res = NULL;
 			vr->vr_refs = 0;
 		}
 		return (error);
 	}
 
 	return (bus_release_resource(dev, type, rid, r));
 }
 
 /* PCI interface. */
 
 static uint32_t
 vga_pci_read_config(device_t dev, device_t child, int reg, int width)
 {
 
 	return (pci_read_config(dev, reg, width));
 }
 
 static void
 vga_pci_write_config(device_t dev, device_t child, int reg,
     uint32_t val, int width)
 {
 
 	pci_write_config(dev, reg, val, width);
 }
 
 static int
 vga_pci_enable_busmaster(device_t dev, device_t child)
 {
 
 	return (pci_enable_busmaster(dev));
 }
 
 static int
 vga_pci_disable_busmaster(device_t dev, device_t child)
 {
 
 	return (pci_disable_busmaster(dev));
 }
 
 static int
 vga_pci_enable_io(device_t dev, device_t child, int space)
 {
 
 	device_printf(dev, "child %s requested pci_enable_io\n",
 	    device_get_nameunit(child));
 	return (pci_enable_io(dev, space));
 }
 
 static int
 vga_pci_disable_io(device_t dev, device_t child, int space)
 {
 
 	device_printf(dev, "child %s requested pci_disable_io\n",
 	    device_get_nameunit(child));
 	return (pci_disable_io(dev, space));
 }
 
 static int
 vga_pci_get_vpd_ident(device_t dev, device_t child, const char **identptr)
 {
 
 	return (pci_get_vpd_ident(dev, identptr));
 }
 
 static int
 vga_pci_get_vpd_readonly(device_t dev, device_t child, const char *kw,
     const char **vptr)
 {
 
 	return (pci_get_vpd_readonly(dev, kw, vptr));
 }
 
 static int
 vga_pci_set_powerstate(device_t dev, device_t child, int state)
 {
 
 	device_printf(dev, "child %s requested pci_set_powerstate\n",
 	    device_get_nameunit(child));
 	return (pci_set_powerstate(dev, state));
 }
 
 static int
 vga_pci_get_powerstate(device_t dev, device_t child)
 {
 
 	device_printf(dev, "child %s requested pci_get_powerstate\n",
 	    device_get_nameunit(child));
 	return (pci_get_powerstate(dev));
 }
 
 static int
 vga_pci_assign_interrupt(device_t dev, device_t child)
 {
 
 	device_printf(dev, "child %s requested pci_assign_interrupt\n",
 	    device_get_nameunit(child));
 	return (PCI_ASSIGN_INTERRUPT(device_get_parent(dev), dev));
 }
 
 static int
 vga_pci_find_cap(device_t dev, device_t child, int capability,
     int *capreg)
 {
 
 	return (pci_find_cap(dev, capability, capreg));
 }
 
 static int
 vga_pci_find_next_cap(device_t dev, device_t child, int capability,
     int start, int *capreg)
 {
 
 	return (pci_find_next_cap(dev, capability, start, capreg));
 }
 
 static int
 vga_pci_find_extcap(device_t dev, device_t child, int capability,
     int *capreg)
 {
 
 	return (pci_find_extcap(dev, capability, capreg));
 }
 
 static int
 vga_pci_find_next_extcap(device_t dev, device_t child, int capability,
     int start, int *capreg)
 {
 
 	return (pci_find_next_extcap(dev, capability, start, capreg));
 }
 
 static int
 vga_pci_find_htcap(device_t dev, device_t child, int capability,
     int *capreg)
 {
 
 	return (pci_find_htcap(dev, capability, capreg));
 }
 
 static int
 vga_pci_find_next_htcap(device_t dev, device_t child, int capability,
     int start, int *capreg)
 {
 
 	return (pci_find_next_htcap(dev, capability, start, capreg));
 }
 
 static int
 vga_pci_alloc_msi(device_t dev, device_t child, int *count)
 {
 	struct vga_pci_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	if (sc->vga_msi_child != NULL)
 		return (EBUSY);
 	error = pci_alloc_msi(dev, count);
 	if (error == 0)
 		sc->vga_msi_child = child;
 	return (error);
 }
 
 static int
 vga_pci_alloc_msix(device_t dev, device_t child, int *count)
 {
 	struct vga_pci_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	if (sc->vga_msi_child != NULL)
 		return (EBUSY);
 	error = pci_alloc_msix(dev, count);
 	if (error == 0)
 		sc->vga_msi_child = child;
 	return (error);
 }
 
 static int
 vga_pci_remap_msix(device_t dev, device_t child, int count,
     const u_int *vectors)
 {
 	struct vga_pci_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (sc->vga_msi_child != child)
 		return (ENXIO);
 	return (pci_remap_msix(dev, count, vectors));
 }
 
 static int
 vga_pci_release_msi(device_t dev, device_t child)
 {
 	struct vga_pci_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	if (sc->vga_msi_child != child)
 		return (ENXIO);
 	error = pci_release_msi(dev);
 	if (error == 0)
 		sc->vga_msi_child = NULL;
 	return (error);
 }
 
 static int
 vga_pci_msi_count(device_t dev, device_t child)
 {
 
 	return (pci_msi_count(dev));
 }
 
 static int
 vga_pci_msix_count(device_t dev, device_t child)
 {
 
 	return (pci_msix_count(dev));
 }
 
 static bus_dma_tag_t
 vga_pci_get_dma_tag(device_t bus, device_t child)
 {
 
 	return (bus_get_dma_tag(bus));
 }
 
 static device_method_t vga_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		vga_pci_probe),
 	DEVMETHOD(device_attach,	vga_pci_attach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	vga_pci_suspend),
 	DEVMETHOD(device_detach,	vga_pci_detach),
 	DEVMETHOD(device_resume,	vga_pci_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,	vga_pci_read_ivar),
 	DEVMETHOD(bus_write_ivar,	vga_pci_write_ivar),
 	DEVMETHOD(bus_setup_intr,	vga_pci_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	vga_pci_teardown_intr),
 	DEVMETHOD(bus_alloc_resource,	vga_pci_alloc_resource),
 	DEVMETHOD(bus_release_resource,	vga_pci_release_resource),
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
 	DEVMETHOD(bus_get_dma_tag,	vga_pci_get_dma_tag),
 
 	/* PCI interface */
 	DEVMETHOD(pci_read_config,	vga_pci_read_config),
 	DEVMETHOD(pci_write_config,	vga_pci_write_config),
 	DEVMETHOD(pci_enable_busmaster,	vga_pci_enable_busmaster),
 	DEVMETHOD(pci_disable_busmaster, vga_pci_disable_busmaster),
 	DEVMETHOD(pci_enable_io,	vga_pci_enable_io),
 	DEVMETHOD(pci_disable_io,	vga_pci_disable_io),
 	DEVMETHOD(pci_get_vpd_ident,	vga_pci_get_vpd_ident),
 	DEVMETHOD(pci_get_vpd_readonly,	vga_pci_get_vpd_readonly),
 	DEVMETHOD(pci_get_powerstate,	vga_pci_get_powerstate),
 	DEVMETHOD(pci_set_powerstate,	vga_pci_set_powerstate),
 	DEVMETHOD(pci_assign_interrupt,	vga_pci_assign_interrupt),
 	DEVMETHOD(pci_find_cap,		vga_pci_find_cap),
 	DEVMETHOD(pci_find_next_cap,	vga_pci_find_next_cap),
 	DEVMETHOD(pci_find_extcap,	vga_pci_find_extcap),
 	DEVMETHOD(pci_find_next_extcap,	vga_pci_find_next_extcap),
 	DEVMETHOD(pci_find_htcap,	vga_pci_find_htcap),
 	DEVMETHOD(pci_find_next_htcap,	vga_pci_find_next_htcap),
 	DEVMETHOD(pci_alloc_msi,	vga_pci_alloc_msi),
 	DEVMETHOD(pci_alloc_msix,	vga_pci_alloc_msix),
 	DEVMETHOD(pci_remap_msix,	vga_pci_remap_msix),
 	DEVMETHOD(pci_release_msi,	vga_pci_release_msi),
 	DEVMETHOD(pci_msi_count,	vga_pci_msi_count),
 	DEVMETHOD(pci_msix_count,	vga_pci_msix_count),
 
 	{ 0, 0 }
 };
 
 static driver_t vga_pci_driver = {
 	"vgapci",
 	vga_pci_methods,
 	sizeof(struct vga_pci_softc),
 };
 
 static devclass_t vga_devclass;
 
 DRIVER_MODULE(vgapci, pci, vga_pci_driver, vga_devclass, 0, 0);
 MODULE_DEPEND(vgapci, x86bios, 1, 1, 1);
Index: head/sys/dev/rc/rc.c
===================================================================
--- head/sys/dev/rc/rc.c	(revision 344854)
+++ head/sys/dev/rc/rc.c	(revision 344855)
@@ -1,1314 +1,1314 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 1995 by Pavel Antonov, Moscow, Russia.
  * Copyright (C) 1995 by Andrey A. Chernov, Moscow, Russia.
- * Copyright (C) 2002 by John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
+ * Copyright (C) 2002 by John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * SDL Communications Riscom/8 (based on Cirrus Logic CL-CD180) driver
  *
  */
 
 /*#define RCDEBUG*/
 
 #include "opt_tty.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/serial.h>
 #include <sys/tty.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <dev/ic/cd180.h>
 #include <dev/rc/rcreg.h>
 #include <isa/isavar.h>
 
 #define	IOBASE_ADDRS	14
 
 #define rcin(sc, port)		RC_IN(sc, port)
 #define rcout(sc, port, v)	RC_OUT(sc, port, v)
 
 #define WAITFORCCR(sc, chan)	rc_wait0((sc), (chan), __LINE__)
 
 #define CCRCMD(sc, chan, cmd) do {					\
 	WAITFORCCR((sc), (chan));					\
 	rcout((sc), CD180_CCR, (cmd));					\
 } while (0)
 
 #define RC_IBUFSIZE     256
 #define RB_I_HIGH_WATER (TTYHOG - 2 * RC_IBUFSIZE)
 #define RC_OBUFSIZE     512
 #define RC_IHIGHWATER   (3 * RC_IBUFSIZE / 4)
 #define INPUT_FLAGS_SHIFT (2 * RC_IBUFSIZE)
 #define LOTS_OF_EVENTS  64
 
 #define RC_FAKEID       0x10
 
 /* Per-channel structure */
 struct rc_chans  {
 	struct rc_softc *rc_rcb;                /* back ptr             */
 	u_short          rc_flags;              /* Misc. flags          */
 	int              rc_chan;               /* Channel #            */
 	u_char           rc_ier;                /* intr. enable reg     */
 	u_char           rc_msvr;               /* modem sig. status    */
 	u_char           rc_cor2;               /* options reg          */
 	u_char           rc_pendcmd;            /* special cmd pending  */
 	u_int            rc_dcdwaits;           /* how many waits DCD in open */
 	struct tty      *rc_tp;                 /* tty struct           */
 	u_char          *rc_iptr;               /* Chars input buffer         */
 	u_char          *rc_hiwat;              /* hi-water mark        */
 	u_char          *rc_bufend;             /* end of buffer        */
 	u_char          *rc_optr;               /* ptr in output buf    */
 	u_char          *rc_obufend;            /* end of output buf    */
 	u_char           rc_ibuf[4 * RC_IBUFSIZE];  /* input buffer         */
 	u_char           rc_obuf[RC_OBUFSIZE];  /* output buffer        */
 	struct callout	 rc_dtrcallout;
 };
 
 /* Per-board structure */
 struct rc_softc {
 	device_t	 sc_dev;
 	struct resource *sc_irq;
 	struct resource *sc_port[IOBASE_ADDRS];
 	int		 sc_irqrid;
 	void		*sc_hwicookie;
 	bus_space_tag_t  sc_bt;
 	bus_space_handle_t sc_bh;
 	u_int            sc_unit;       /* unit #               */
 	u_char           sc_dtr;        /* DTR status           */
 	int		 sc_scheduled_event;
 	void		*sc_swicookie;
 	struct rc_chans  sc_channels[CD180_NCHAN]; /* channels */
 };
 
 /* Static prototypes */
 static t_close_t rc_close;
 static void rc_break(struct tty *, int);
 static void rc_release_resources(device_t dev);
 static void rc_intr(void *);
 static void rc_hwreset(struct rc_softc *, unsigned int);
 static int  rc_test(struct rc_softc *);
 static void rc_discard_output(struct rc_chans *);
 static int  rc_modem(struct tty *, int, int);
 static void rc_start(struct tty *);
 static void rc_stop(struct tty *, int rw);
 static int  rc_param(struct tty *, struct termios *);
 static void rc_pollcard(void *);
 static void rc_reinit(struct rc_softc *);
 #ifdef RCDEBUG
 static void printrcflags();
 #endif
 static void rc_wait0(struct rc_softc *sc, int chan, int line);
 
 static devclass_t rc_devclass;
 
 /* Flags */
 #define RC_DTR_OFF      0x0001          /* DTR wait, for close/open     */
 #define RC_ACTOUT       0x0002          /* Dial-out port active         */
 #define RC_RTSFLOW      0x0004          /* RTS flow ctl enabled         */
 #define RC_CTSFLOW      0x0008          /* CTS flow ctl enabled         */
 #define RC_DORXFER      0x0010          /* RXFER event planned          */
 #define RC_DOXXFER      0x0020          /* XXFER event planned          */
 #define RC_MODCHG       0x0040          /* Modem status changed         */
 #define RC_OSUSP        0x0080          /* Output suspended             */
 #define RC_OSBUSY       0x0100          /* start() routine in progress  */
 #define RC_WAS_BUFOVFL  0x0200          /* low-level buffer ovferflow   */
 #define RC_WAS_SILOVFL  0x0400          /* silo buffer overflow         */
 #define RC_SEND_RDY     0x0800          /* ready to send */
 
 /* Table for translation of RCSR status bits to internal form */
 static int rc_rcsrt[16] = {
 	0,             TTY_OE,               TTY_FE,
 	TTY_FE|TTY_OE, TTY_PE,               TTY_PE|TTY_OE,
 	TTY_PE|TTY_FE, TTY_PE|TTY_FE|TTY_OE, TTY_BI,
 	TTY_BI|TTY_OE, TTY_BI|TTY_FE,        TTY_BI|TTY_FE|TTY_OE,
 	TTY_BI|TTY_PE, TTY_BI|TTY_PE|TTY_OE, TTY_BI|TTY_PE|TTY_FE,
 	TTY_BI|TTY_PE|TTY_FE|TTY_OE
 };
 
 static int rc_ports[] =
     { 0x220, 0x240, 0x250, 0x260, 0x2a0, 0x2b0, 0x300, 0x320 };
 static int iobase_addrs[IOBASE_ADDRS] =
     { 0, 0x400, 0x800, 0xc00, 0x1400, 0x1800, 0x1c00, 0x2000,
       0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x8000 };
 
 /**********************************************/
 
 static int
 rc_probe(device_t dev)
 {
 	u_int port;
 	int i, found;
 
 	/*
 	 * We don't know of any PnP ID's for these cards.
 	 */
 	if (isa_get_logicalid(dev) != 0)
 		return (ENXIO);
 
 	/*
 	 * We have to have an IO port hint that is valid.
 	 */
 	port = isa_get_port(dev);
 	if (port == -1)
 		return (ENXIO);
 	found = 0;
 	for (i = 0; i < nitems(rc_ports); i++)
 		if (rc_ports[i] == port) {
 			found = 1;
 			break;
 		}
 	if (!found)
 		return (ENXIO);
 
 	/*
 	 * We have to have an IRQ hint.
 	 */
 	if (isa_get_irq(dev) == -1)
 		return (ENXIO);
 
 	device_set_desc(dev, "SDL Riscom/8");
 	return (0);
 }
 
 static int
 rc_attach(device_t dev)
 {
  	struct rc_chans *rc;
 	struct tty *tp;
 	struct rc_softc *sc;
 	u_int port;
 	int base, chan, error, i, x;
 
 	sc = device_get_softc(dev);
 	sc->sc_dev = dev;
 
 	/*
 	 * We need to have IO ports.  Lots of them.  We need
 	 * the following ranges relative to the base port:
 	 * 0x0    -   0x10
 	 * 0x400  -  0x410
 	 * 0x800  -  0x810
 	 * 0xc00  -  0xc10
 	 * 0x1400 - 0x1410
 	 * 0x1800 - 0x1810
 	 * 0x1c00 - 0x1c10
 	 * 0x2000 - 0x2010
 	 * 0x3000 - 0x3010
 	 * 0x3400 - 0x3410
 	 * 0x3800 - 0x3810
 	 * 0x3c00 - 0x3c10
 	 * 0x4000 - 0x4010
 	 * 0x8000 - 0x8010
 	 */
 	port = isa_get_port(dev);
 	for (i = 0; i < IOBASE_ADDRS; i++)
 		if (bus_set_resource(dev, SYS_RES_IOPORT, i,
 		    port + iobase_addrs[i], 0x10) != 0)
 			return (ENXIO);
 	error = ENOMEM;
 	for (i = 0; i < IOBASE_ADDRS; i++) {
 		x = i;
 		sc->sc_port[i] = bus_alloc_resource_anywhere(dev,
 		    SYS_RES_IOPORT, &x, 0x10, RF_ACTIVE);
 		if (x != i) {
 			device_printf(dev, "ioport %d was rid %d\n", i, x);
 			goto fail;
 		}
 		if (sc->sc_port[i] == NULL) {
 			device_printf(dev, "failed to alloc ioports %x-%x\n",
 			    port + iobase_addrs[i],
 			    port + iobase_addrs[i] + 0x10);
 			goto fail;
 		}
 	}
 	sc->sc_bt = rman_get_bustag(sc->sc_port[0]);
 	sc->sc_bh = rman_get_bushandle(sc->sc_port[0]);
 
 	sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irqrid,
 	    RF_ACTIVE);
 	if (sc->sc_irq == NULL) {
 		device_printf(dev, "failed to alloc IRQ\n");
 		goto fail;
 	}
 
 	/*
 	 * Now do some actual tests to make sure it works.
 	 */
 	error = ENXIO;
 	rcout(sc, CD180_PPRL, 0x22); /* Random values to Prescale reg. */
 	rcout(sc, CD180_PPRH, 0x11);
 	if (rcin(sc, CD180_PPRL) != 0x22 || rcin(sc, CD180_PPRH) != 0x11)
 		goto fail;
 	if (rc_test(sc))
 		goto fail;
 
 	/*
 	 * Ok, start actually hooking things up.
 	 */
 	sc->sc_unit = device_get_unit(dev);
 	/*sc->sc_chipid = 0x10 + device_get_unit(dev);*/
 	device_printf(dev, "%d chans, firmware rev. %c\n",
 		CD180_NCHAN, (rcin(sc, CD180_GFRCR) & 0xF) + 'A');
 	rc = sc->sc_channels;
 	base = CD180_NCHAN * sc->sc_unit;
 	for (chan = 0; chan < CD180_NCHAN; chan++, rc++) {
 		rc->rc_rcb     = sc;
 		rc->rc_chan    = chan;
 		rc->rc_iptr    = rc->rc_ibuf;
 		rc->rc_bufend  = &rc->rc_ibuf[RC_IBUFSIZE];
 		rc->rc_hiwat   = &rc->rc_ibuf[RC_IHIGHWATER];
 		rc->rc_optr    = rc->rc_obufend  = rc->rc_obuf;
 		callout_init(&rc->rc_dtrcallout, 0);
 		tp = rc->rc_tp = ttyalloc();
 		tp->t_sc = rc;
 		tp->t_oproc   = rc_start;
 		tp->t_param   = rc_param;
 		tp->t_modem   = rc_modem;
 		tp->t_break   = rc_break;
 		tp->t_close   = rc_close;
 		tp->t_stop    = rc_stop;
 		ttycreate(tp, TS_CALLOUT, "m%d", chan + base);
 	}
 
 	error = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_TTY, NULL, rc_intr,
 	    sc, &sc->sc_hwicookie);
 	if (error) {
 		device_printf(dev, "failed to register interrupt handler\n");
 		goto fail;
 	}
 		
 	swi_add(&tty_intr_event, "rc", rc_pollcard, sc, SWI_TTY, 0,
 	    &sc->sc_swicookie);
 	return (0);
 
 fail:
 	rc_release_resources(dev);
 	return (error);
 }
 
 static int
 rc_detach(device_t dev)
 {
 	struct rc_softc *sc;
 	struct rc_chans *rc;
 	int error, i;
 
 	sc = device_get_softc(dev);
 
 	rc = sc->sc_channels;
 	for (i = 0; i < CD180_NCHAN; i++, rc++)
 		ttyfree(rc->rc_tp);
 
 	error = bus_teardown_intr(dev, sc->sc_irq, sc->sc_hwicookie);
 	if (error)
 		device_printf(dev, "failed to deregister interrupt handler\n");
 	swi_remove(sc->sc_swicookie);
 	rc_release_resources(dev);
 
 	return (0);
 }
 
 static void
 rc_release_resources(device_t dev)
 {
 	struct rc_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 	if (sc->sc_irq != NULL) {
 		bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irqrid,
 		    sc->sc_irq);
 		sc->sc_irq = NULL;
 	}
 	for (i = 0; i < IOBASE_ADDRS; i++) {
 		if (sc->sc_port[i] == NULL)
 			break;
 		bus_release_resource(dev, SYS_RES_IOPORT, i, sc->sc_port[i]);
 		sc->sc_port[i] = NULL;
 	}
 }
 
 /* RC interrupt handling */
 static void
 rc_intr(void *arg)
 {
 	struct rc_softc        *sc;
 	struct rc_chans        *rc;
 	int                    resid, chan;
 	u_char                 val, iack, bsr, ucnt, *optr;
 	int                    good_data, t_state;	
 
 	sc = (struct rc_softc *)arg;
 	bsr = ~(rcin(sc, RC_BSR));
 	if (!(bsr & (RC_BSR_TOUT|RC_BSR_RXINT|RC_BSR_TXINT|RC_BSR_MOINT))) {
 		device_printf(sc->sc_dev, "extra interrupt\n");
 		rcout(sc, CD180_EOIR, 0);
 		return;
 	}
 
 	while (bsr & (RC_BSR_TOUT|RC_BSR_RXINT|RC_BSR_TXINT|RC_BSR_MOINT)) {
 #ifdef RCDEBUG_DETAILED
 		device_printf(sc->sc_dev, "intr (%p) %s%s%s%s\n", arg, bsr,
 			(bsr & RC_BSR_TOUT)?"TOUT ":"",
 			(bsr & RC_BSR_RXINT)?"RXINT ":"",
 			(bsr & RC_BSR_TXINT)?"TXINT ":"",
 			(bsr & RC_BSR_MOINT)?"MOINT":"");
 #endif
 		if (bsr & RC_BSR_TOUT) {
 			device_printf(sc->sc_dev,
 			    "hardware failure, reset board\n");
 			rcout(sc, RC_CTOUT, 0);
 			rc_reinit(sc);
 			return;
 		}
 		if (bsr & RC_BSR_RXINT) {
 			iack = rcin(sc, RC_PILR_RX);
 			good_data = (iack == (GIVR_IT_RGDI | RC_FAKEID));
 			if (!good_data && iack != (GIVR_IT_REI | RC_FAKEID)) {
 				device_printf(sc->sc_dev,
 				    "fake rxint: %02x\n", iack);
 				goto more_intrs;
 			}
 			chan = ((rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH);
 			rc = &sc->sc_channels[chan];
 			t_state = rc->rc_tp->t_state;
 			/* Do RTS flow control stuff */
 			if (  (rc->rc_flags & RC_RTSFLOW)
 			    || !(t_state & TS_ISOPEN)
 			   ) {
 				if (  (   !(t_state & TS_ISOPEN)
 				       || (t_state & TS_TBLOCK)
 				      )
 				    && (rc->rc_msvr & MSVR_RTS)
 				   )
 					rcout(sc, CD180_MSVR,
 						rc->rc_msvr &= ~MSVR_RTS);
 				else if (!(rc->rc_msvr & MSVR_RTS))
 					rcout(sc, CD180_MSVR,
 						rc->rc_msvr |= MSVR_RTS);
 			}
 			ucnt  = rcin(sc, CD180_RDCR) & 0xF;
 			resid = 0;
 
 			if (t_state & TS_ISOPEN) {
 				/* check for input buffer overflow */
 				if ((rc->rc_iptr + ucnt) >= rc->rc_bufend) {
 					resid  = ucnt;
 					ucnt   = rc->rc_bufend - rc->rc_iptr;
 					resid -= ucnt;
 					if (!(rc->rc_flags & RC_WAS_BUFOVFL)) {
 						rc->rc_flags |= RC_WAS_BUFOVFL;
 						sc->sc_scheduled_event++;
 					}
 				}
 				optr = rc->rc_iptr;
 				/* check foor good data */
 				if (good_data) {
 					while (ucnt-- > 0) {
 						val = rcin(sc, CD180_RDR);
 						optr[0] = val;
 						optr[INPUT_FLAGS_SHIFT] = 0;
 						optr++;
 						sc->sc_scheduled_event++;
 						if (val != 0 && val == rc->rc_tp->t_hotchar)
 							swi_sched(sc->sc_swicookie, 0);
 					}
 				} else {
 					/* Store also status data */
 					while (ucnt-- > 0) {
 						iack = rcin(sc, CD180_RCSR);
 						if (iack & RCSR_Timeout)
 							break;
 						if (   (iack & RCSR_OE)
 						    && !(rc->rc_flags & RC_WAS_SILOVFL)) {
 							rc->rc_flags |= RC_WAS_SILOVFL;
 							sc->sc_scheduled_event++;
 						}
 						val = rcin(sc, CD180_RDR);
 						/*
 						  Don't store PE if IGNPAR and BREAK if IGNBRK,
 						  this hack allows "raw" tty optimization
 						  works even if IGN* is set.
 						*/
 						if (   !(iack & (RCSR_PE|RCSR_FE|RCSR_Break))
 						    || ((!(iack & (RCSR_PE|RCSR_FE))
 						    ||  !(rc->rc_tp->t_iflag & IGNPAR))
 						    && (!(iack & RCSR_Break)
 						    ||  !(rc->rc_tp->t_iflag & IGNBRK)))) {
 							if (   (iack & (RCSR_PE|RCSR_FE))
 							    && (t_state & TS_CAN_BYPASS_L_RINT)
 							    && ((iack & RCSR_FE)
 							    ||  ((iack & RCSR_PE)
 							    &&  (rc->rc_tp->t_iflag & INPCK))))
 								val = 0;
 							else if (val != 0 && val == rc->rc_tp->t_hotchar)
 								swi_sched(sc->sc_swicookie, 0);
 							optr[0] = val;
 							optr[INPUT_FLAGS_SHIFT] = iack;
 							optr++;
 							sc->sc_scheduled_event++;
 						}
 					}
 				}
 				rc->rc_iptr = optr;
 				rc->rc_flags |= RC_DORXFER;
 			} else
 				resid = ucnt;
 			/* Clear FIFO if necessary */
 			while (resid-- > 0) {
 				if (!good_data)
 					iack = rcin(sc, CD180_RCSR);
 				else
 					iack = 0;
 				if (iack & RCSR_Timeout)
 					break;
 				(void) rcin(sc, CD180_RDR);
 			}
 			goto more_intrs;
 		}
 		if (bsr & RC_BSR_MOINT) {
 			iack = rcin(sc, RC_PILR_MODEM);
 			if (iack != (GIVR_IT_MSCI | RC_FAKEID)) {
 				device_printf(sc->sc_dev, "fake moint: %02x\n",
 				    iack);
 				goto more_intrs;
 			}
 			chan = ((rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH);
 			rc = &sc->sc_channels[chan];
 			iack = rcin(sc, CD180_MCR);
 			rc->rc_msvr = rcin(sc, CD180_MSVR);
 			rcout(sc, CD180_MCR, 0);
 #ifdef RCDEBUG
 			printrcflags(rc, "moint");
 #endif
 			if (rc->rc_flags & RC_CTSFLOW) {
 				if (rc->rc_msvr & MSVR_CTS)
 					rc->rc_flags |= RC_SEND_RDY;
 				else
 					rc->rc_flags &= ~RC_SEND_RDY;
 			} else
 				rc->rc_flags |= RC_SEND_RDY;
 			if ((iack & MCR_CDchg) && !(rc->rc_flags & RC_MODCHG)) {
 				sc->sc_scheduled_event += LOTS_OF_EVENTS;
 				rc->rc_flags |= RC_MODCHG;
 				swi_sched(sc->sc_swicookie, 0);
 			}
 			goto more_intrs;
 		}
 		if (bsr & RC_BSR_TXINT) {
 			iack = rcin(sc, RC_PILR_TX);
 			if (iack != (GIVR_IT_TDI | RC_FAKEID)) {
 				device_printf(sc->sc_dev, "fake txint: %02x\n",
 				    iack);
 				goto more_intrs;
 			}
 			chan = ((rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH);
 			rc = &sc->sc_channels[chan];
 			if (    (rc->rc_flags & RC_OSUSP)
 			    || !(rc->rc_flags & RC_SEND_RDY)
 			   )
 				goto more_intrs;
 			/* Handle breaks and other stuff */
 			if (rc->rc_pendcmd) {
 				rcout(sc, CD180_COR2, rc->rc_cor2 |= COR2_ETC);
 				rcout(sc, CD180_TDR,  CD180_C_ESC);
 				rcout(sc, CD180_TDR,  rc->rc_pendcmd);
 				rcout(sc, CD180_COR2, rc->rc_cor2 &= ~COR2_ETC);
 				rc->rc_pendcmd = 0;
 				goto more_intrs;
 			}
 			optr = rc->rc_optr;
 			resid = rc->rc_obufend - optr;
 			if (resid > CD180_NFIFO)
 				resid = CD180_NFIFO;
 			while (resid-- > 0)
 				rcout(sc, CD180_TDR, *optr++);
 			rc->rc_optr = optr;
 
 			/* output completed? */
 			if (optr >= rc->rc_obufend) {
 				rcout(sc, CD180_IER, rc->rc_ier &= ~IER_TxRdy);
 #ifdef RCDEBUG
 				device_printf(sc->sc_dev,
 				    "channel %d: output completed\n",
 				    rc->rc_chan);
 #endif
 				if (!(rc->rc_flags & RC_DOXXFER)) {
 					sc->sc_scheduled_event += LOTS_OF_EVENTS;
 					rc->rc_flags |= RC_DOXXFER;
 					swi_sched(sc->sc_swicookie, 0);
 				}
 			}
 		}
 	more_intrs:
 		rcout(sc, CD180_EOIR, 0);   /* end of interrupt */
 		rcout(sc, RC_CTOUT, 0);
 		bsr = ~(rcin(sc, RC_BSR));
 	}
 }
 
 /* Feed characters to output buffer */
 static void
 rc_start(struct tty *tp)
 {
 	struct rc_softc *sc;
 	struct rc_chans *rc;
 	int s;
 
 	rc = tp->t_sc;
 	if (rc->rc_flags & RC_OSBUSY)
 		return;
 	sc = rc->rc_rcb;
 	s = spltty();
 	rc->rc_flags |= RC_OSBUSY;
 	critical_enter();
 	if (tp->t_state & TS_TTSTOP)
 		rc->rc_flags |= RC_OSUSP;
 	else
 		rc->rc_flags &= ~RC_OSUSP;
 	/* Do RTS flow control stuff */
 	if (   (rc->rc_flags & RC_RTSFLOW)
 	    && (tp->t_state & TS_TBLOCK)
 	    && (rc->rc_msvr & MSVR_RTS)
 	   ) {
 		rcout(sc, CD180_CAR, rc->rc_chan);
 		rcout(sc, CD180_MSVR, rc->rc_msvr &= ~MSVR_RTS);
 	} else if (!(rc->rc_msvr & MSVR_RTS)) {
 		rcout(sc, CD180_CAR, rc->rc_chan);
 		rcout(sc, CD180_MSVR, rc->rc_msvr |= MSVR_RTS);
 	}
 	critical_exit();
 	if (tp->t_state & (TS_TIMEOUT|TS_TTSTOP))
 		goto out;
 #ifdef RCDEBUG
 	printrcflags(rc, "rcstart");
 #endif
 	ttwwakeup(tp);
 #ifdef RCDEBUG
 	printf("rcstart: outq = %d obuf = %d\n",
 		tp->t_outq.c_cc, rc->rc_obufend - rc->rc_optr);
 #endif
 	if (tp->t_state & TS_BUSY)
 		goto out;    /* output still in progress ... */
 
 	if (tp->t_outq.c_cc > 0) {
 		u_int   ocnt;
 
 		tp->t_state |= TS_BUSY;
 		ocnt = q_to_b(&tp->t_outq, rc->rc_obuf, sizeof rc->rc_obuf);
 		critical_enter();
 		rc->rc_optr = rc->rc_obuf;
 		rc->rc_obufend = rc->rc_optr + ocnt;
 		critical_exit();
 		if (!(rc->rc_ier & IER_TxRdy)) {
 #ifdef RCDEBUG
 			device_printf(sc->sc_dev,
 			    "channel %d: rcstart enable txint\n", rc->rc_chan);
 #endif
 			rcout(sc, CD180_CAR, rc->rc_chan);
 			rcout(sc, CD180_IER, rc->rc_ier |= IER_TxRdy);
 		}
 	}
 out:
 	rc->rc_flags &= ~RC_OSBUSY;
 	(void) splx(s);
 }
 
 /* Handle delayed events. */
 void
 rc_pollcard(void *arg)
 {
 	struct rc_softc *sc;
 	struct rc_chans *rc;
 	struct tty *tp;
 	u_char *tptr, *eptr;
 	int chan, icnt;
 
 	sc = (struct rc_softc *)arg;
 	if (sc->sc_scheduled_event == 0)
 		return;
 	do {
 		rc = sc->sc_channels;
 		for (chan = 0; chan < CD180_NCHAN; rc++, chan++) {
 			tp = rc->rc_tp;
 #ifdef RCDEBUG
 			if (rc->rc_flags & (RC_DORXFER|RC_DOXXFER|RC_MODCHG|
 			    RC_WAS_BUFOVFL|RC_WAS_SILOVFL))
 				printrcflags(rc, "rcevent");
 #endif
 			if (rc->rc_flags & RC_WAS_BUFOVFL) {
 				critical_enter();
 				rc->rc_flags &= ~RC_WAS_BUFOVFL;
 				sc->sc_scheduled_event--;
 				critical_exit();
 				device_printf(sc->sc_dev,
 			    "channel %d: interrupt-level buffer overflow\n",
 				     chan);
 			}
 			if (rc->rc_flags & RC_WAS_SILOVFL) {
 				critical_enter();
 				rc->rc_flags &= ~RC_WAS_SILOVFL;
 				sc->sc_scheduled_event--;
 				critical_exit();
 				device_printf(sc->sc_dev,
 				    "channel %d: silo overflow\n", chan);
 			}
 			if (rc->rc_flags & RC_MODCHG) {
 				critical_enter();
 				rc->rc_flags &= ~RC_MODCHG;
 				sc->sc_scheduled_event -= LOTS_OF_EVENTS;
 				critical_exit();
 				ttyld_modem(tp, !!(rc->rc_msvr & MSVR_CD));
 			}
 			if (rc->rc_flags & RC_DORXFER) {
 				critical_enter();
 				rc->rc_flags &= ~RC_DORXFER;
 				eptr = rc->rc_iptr;
 				if (rc->rc_bufend == &rc->rc_ibuf[2 * RC_IBUFSIZE])
 					tptr = &rc->rc_ibuf[RC_IBUFSIZE];
 				else
 					tptr = rc->rc_ibuf;
 				icnt = eptr - tptr;
 				if (icnt > 0) {
 					if (rc->rc_bufend == &rc->rc_ibuf[2 * RC_IBUFSIZE]) {
 						rc->rc_iptr   = rc->rc_ibuf;
 						rc->rc_bufend = &rc->rc_ibuf[RC_IBUFSIZE];
 						rc->rc_hiwat  = &rc->rc_ibuf[RC_IHIGHWATER];
 					} else {
 						rc->rc_iptr   = &rc->rc_ibuf[RC_IBUFSIZE];
 						rc->rc_bufend = &rc->rc_ibuf[2 * RC_IBUFSIZE];
 						rc->rc_hiwat  =
 							&rc->rc_ibuf[RC_IBUFSIZE + RC_IHIGHWATER];
 					}
 					if (   (rc->rc_flags & RC_RTSFLOW)
 					    && (tp->t_state & TS_ISOPEN)
 					    && !(tp->t_state & TS_TBLOCK)
 					    && !(rc->rc_msvr & MSVR_RTS)
 					    ) {
 						rcout(sc, CD180_CAR, chan);
 						rcout(sc, CD180_MSVR,
 							rc->rc_msvr |= MSVR_RTS);
 					}
 					sc->sc_scheduled_event -= icnt;
 				}
 				critical_exit();
 
 				if (icnt <= 0 || !(tp->t_state & TS_ISOPEN))
 					goto done1;
 
 				if (   (tp->t_state & TS_CAN_BYPASS_L_RINT)
 				    && !(tp->t_state & TS_LOCAL)) {
 					if ((tp->t_rawq.c_cc + icnt) >= RB_I_HIGH_WATER
 					    && ((rc->rc_flags & RC_RTSFLOW) || (tp->t_iflag & IXOFF))
 					    && !(tp->t_state & TS_TBLOCK))
 						ttyblock(tp);
 					tk_nin += icnt;
 					tk_rawcc += icnt;
 					tp->t_rawcc += icnt;
 					if (b_to_q(tptr, icnt, &tp->t_rawq))
 						device_printf(sc->sc_dev,
 				    "channel %d: tty-level buffer overflow\n",
 						    chan);
 					ttwakeup(tp);
 					if ((tp->t_state & TS_TTSTOP) && ((tp->t_iflag & IXANY)
 					    || (tp->t_cc[VSTART] == tp->t_cc[VSTOP]))) {
 						tp->t_state &= ~TS_TTSTOP;
 						tp->t_lflag &= ~FLUSHO;
 						rc_start(tp);
 					}
 				} else {
 					for (; tptr < eptr; tptr++)
 						ttyld_rint(tp,
 						    (tptr[0] |
 						    rc_rcsrt[tptr[INPUT_FLAGS_SHIFT] & 0xF]));
 				}
 done1: ;
 			}
 			if (rc->rc_flags & RC_DOXXFER) {
 				critical_enter();
 				sc->sc_scheduled_event -= LOTS_OF_EVENTS;
 				rc->rc_flags &= ~RC_DOXXFER;
 				rc->rc_tp->t_state &= ~TS_BUSY;
 				critical_exit();
 				ttyld_start(tp);
 			}
 			if (sc->sc_scheduled_event == 0)
 				break;
 		}
 	} while (sc->sc_scheduled_event >= LOTS_OF_EVENTS);
 }
 
 static void
 rc_stop(struct tty *tp, int rw)
 {
 	struct rc_softc *sc;
 	struct rc_chans *rc;
 	u_char *tptr, *eptr;
 
 	rc = tp->t_sc;
 	sc = rc->rc_rcb;
 #ifdef RCDEBUG
 	device_printf(sc->sc_dev, "channel %d: rc_stop %s%s\n",
 	    rc->rc_chan, (rw & FWRITE)?"FWRITE ":"", (rw & FREAD)?"FREAD":"");
 #endif
 	if (rw & FWRITE)
 		rc_discard_output(rc);
 	critical_enter();
 	if (rw & FREAD) {
 		rc->rc_flags &= ~RC_DORXFER;
 		eptr = rc->rc_iptr;
 		if (rc->rc_bufend == &rc->rc_ibuf[2 * RC_IBUFSIZE]) {
 			tptr = &rc->rc_ibuf[RC_IBUFSIZE];
 			rc->rc_iptr = &rc->rc_ibuf[RC_IBUFSIZE];
 		} else {
 			tptr = rc->rc_ibuf;
 			rc->rc_iptr = rc->rc_ibuf;
 		}
 		sc->sc_scheduled_event -= eptr - tptr;
 	}
 	if (tp->t_state & TS_TTSTOP)
 		rc->rc_flags |= RC_OSUSP;
 	else
 		rc->rc_flags &= ~RC_OSUSP;
 	critical_exit();
 }
 
 static void
 rc_close(struct tty *tp)
 {
 	struct rc_chans *rc;
 	struct rc_softc *sc;
 	int s;
 
 	rc = tp->t_sc;
 	sc = rc->rc_rcb;
 	s = spltty();
 	rcout(sc, CD180_CAR, rc->rc_chan);
 
 	/* Disable rx/tx intrs */
 	rcout(sc, CD180_IER, rc->rc_ier = 0);
 	if (   (tp->t_cflag & HUPCL)
 	    || (!(rc->rc_flags & RC_ACTOUT)
 	       && !(rc->rc_msvr & MSVR_CD)
 	       && !(tp->t_cflag & CLOCAL))
 	    || !(tp->t_state & TS_ISOPEN)
 	   ) {
 		CCRCMD(sc, rc->rc_chan, CCR_ResetChan);
 		WAITFORCCR(sc, rc->rc_chan);
 		(void) rc_modem(tp, SER_RTS, 0);
 		ttydtrwaitstart(tp);
 	}
 	rc->rc_flags &= ~RC_ACTOUT;
 	wakeup( &rc->rc_rcb);  /* wake bi */
 	wakeup(TSA_CARR_ON(tp));
 	(void) splx(s);
 }
 
 /* Reset the bastard */
 static void
 rc_hwreset(struct rc_softc *sc, u_int chipid)
 {
 	CCRCMD(sc, -1, CCR_HWRESET);            /* Hardware reset */
 	DELAY(20000);
 	WAITFORCCR(sc, -1);
 
 	rcout(sc, RC_CTOUT, 0);             /* Clear timeout  */
 	rcout(sc, CD180_GIVR,  chipid);
 	rcout(sc, CD180_GICR,  0);
 
 	/* Set Prescaler Registers (1 msec) */
 	rcout(sc, CD180_PPRL, ((RC_OSCFREQ + 999) / 1000) & 0xFF);
 	rcout(sc, CD180_PPRH, ((RC_OSCFREQ + 999) / 1000) >> 8);
 
 	/* Initialize Priority Interrupt Level Registers */
 	rcout(sc, CD180_PILR1, RC_PILR_MODEM);
 	rcout(sc, CD180_PILR2, RC_PILR_TX);
 	rcout(sc, CD180_PILR3, RC_PILR_RX);
 
 	/* Reset DTR */
 	rcout(sc, RC_DTREG, ~0);
 }
 
 /* Set channel parameters */
 static int
 rc_param(struct tty *tp, struct termios *ts)
 {
 	struct rc_softc *sc;
 	struct rc_chans *rc;
 	int idivs, odivs, s, val, cflag, iflag, lflag, inpflow;
 
 	if (   ts->c_ospeed < 0 || ts->c_ospeed > 76800
 	    || ts->c_ispeed < 0 || ts->c_ispeed > 76800
 	   )
 		return (EINVAL);
 	if (ts->c_ispeed == 0)
 		ts->c_ispeed = ts->c_ospeed;
 	odivs = RC_BRD(ts->c_ospeed);
 	idivs = RC_BRD(ts->c_ispeed);
 
 	rc = tp->t_sc;
 	sc = rc->rc_rcb;
 	s = spltty();
 
 	/* Select channel */
 	rcout(sc, CD180_CAR, rc->rc_chan);
 
 	/* If speed == 0, hangup line */
 	if (ts->c_ospeed == 0) {
 		CCRCMD(sc, rc->rc_chan, CCR_ResetChan);
 		WAITFORCCR(sc, rc->rc_chan);
 		(void) rc_modem(tp, 0, SER_DTR);
 	}
 
 	tp->t_state &= ~TS_CAN_BYPASS_L_RINT;
 	cflag = ts->c_cflag;
 	iflag = ts->c_iflag;
 	lflag = ts->c_lflag;
 
 	if (idivs > 0) {
 		rcout(sc, CD180_RBPRL, idivs & 0xFF);
 		rcout(sc, CD180_RBPRH, idivs >> 8);
 	}
 	if (odivs > 0) {
 		rcout(sc, CD180_TBPRL, odivs & 0xFF);
 		rcout(sc, CD180_TBPRH, odivs >> 8);
 	}
 
 	/* set timeout value */
 	if (ts->c_ispeed > 0) {
 		int itm = ts->c_ispeed > 2400 ? 5 : 10000 / ts->c_ispeed + 1;
 
 		if (   !(lflag & ICANON)
 		    && ts->c_cc[VMIN] != 0 && ts->c_cc[VTIME] != 0
 		    && ts->c_cc[VTIME] * 10 > itm)
 			itm = ts->c_cc[VTIME] * 10;
 
 		rcout(sc, CD180_RTPR, itm <= 255 ? itm : 255);
 	}
 
 	switch (cflag & CSIZE) {
 		case CS5:       val = COR1_5BITS;      break;
 		case CS6:       val = COR1_6BITS;      break;
 		case CS7:       val = COR1_7BITS;      break;
 		default:
 		case CS8:       val = COR1_8BITS;      break;
 	}
 	if (cflag & PARENB) {
 		val |= COR1_NORMPAR;
 		if (cflag & PARODD)
 			val |= COR1_ODDP;
 		if (!(cflag & INPCK))
 			val |= COR1_Ignore;
 	} else
 		val |= COR1_Ignore;
 	if (cflag & CSTOPB)
 		val |= COR1_2SB;
 	rcout(sc, CD180_COR1, val);
 
 	/* Set FIFO threshold */
 	val = ts->c_ospeed <= 4800 ? 1 : CD180_NFIFO / 2;
 	inpflow = 0;
 	if (   (iflag & IXOFF)
 	    && (   ts->c_cc[VSTOP] != _POSIX_VDISABLE
 		&& (   ts->c_cc[VSTART] != _POSIX_VDISABLE
 		    || (iflag & IXANY)
 		   )
 	       )
 	   ) {
 		inpflow = 1;
 		val |= COR3_SCDE|COR3_FCT;
 	}
 	rcout(sc, CD180_COR3, val);
 
 	/* Initialize on-chip automatic flow control */
 	val = 0;
 	rc->rc_flags &= ~(RC_CTSFLOW|RC_SEND_RDY);
 	if (cflag & CCTS_OFLOW) {
 		rc->rc_flags |= RC_CTSFLOW;
 		val |= COR2_CtsAE;
 	} else
 		rc->rc_flags |= RC_SEND_RDY;
 	if (tp->t_state & TS_TTSTOP)
 		rc->rc_flags |= RC_OSUSP;
 	else
 		rc->rc_flags &= ~RC_OSUSP;
 	if (cflag & CRTS_IFLOW)
 		rc->rc_flags |= RC_RTSFLOW;
 	else
 		rc->rc_flags &= ~RC_RTSFLOW;
 
 	if (inpflow) {
 		if (ts->c_cc[VSTART] != _POSIX_VDISABLE)
 			rcout(sc, CD180_SCHR1, ts->c_cc[VSTART]);
 		rcout(sc, CD180_SCHR2, ts->c_cc[VSTOP]);
 		val |= COR2_TxIBE;
 		if (iflag & IXANY)
 			val |= COR2_IXM;
 	}
 
 	rcout(sc, CD180_COR2, rc->rc_cor2 = val);
 
 	CCRCMD(sc, rc->rc_chan, CCR_CORCHG1 | CCR_CORCHG2 | CCR_CORCHG3);
 
 	ttyldoptim(tp);
 
 	/* modem ctl */
 	val = cflag & CLOCAL ? 0 : MCOR1_CDzd;
 	if (cflag & CCTS_OFLOW)
 		val |= MCOR1_CTSzd;
 	rcout(sc, CD180_MCOR1, val);
 
 	val = cflag & CLOCAL ? 0 : MCOR2_CDod;
 	if (cflag & CCTS_OFLOW)
 		val |= MCOR2_CTSod;
 	rcout(sc, CD180_MCOR2, val);
 
 	/* enable i/o and interrupts */
 	CCRCMD(sc, rc->rc_chan,
 		CCR_XMTREN | ((cflag & CREAD) ? CCR_RCVREN : CCR_RCVRDIS));
 	WAITFORCCR(sc, rc->rc_chan);
 
 	rc->rc_ier = cflag & CLOCAL ? 0 : IER_CD;
 	if (cflag & CCTS_OFLOW)
 		rc->rc_ier |= IER_CTS;
 	if (cflag & CREAD)
 		rc->rc_ier |= IER_RxData;
 	if (tp->t_state & TS_BUSY)
 		rc->rc_ier |= IER_TxRdy;
 	if (ts->c_ospeed != 0)
 		rc_modem(tp, SER_DTR, 0);
 	if ((cflag & CCTS_OFLOW) && (rc->rc_msvr & MSVR_CTS))
 		rc->rc_flags |= RC_SEND_RDY;
 	rcout(sc, CD180_IER, rc->rc_ier);
 	(void) splx(s);
 	return 0;
 }
 
 /* Re-initialize board after bogus interrupts */
 static void
 rc_reinit(struct rc_softc *sc)
 {
 	struct rc_chans *rc;
 	int i;
 
 	rc_hwreset(sc, RC_FAKEID);
 	rc = sc->sc_channels;
 	for (i = 0; i < CD180_NCHAN; i++, rc++)
 		(void) rc_param(rc->rc_tp, &rc->rc_tp->t_termios);
 }
 
 /* Modem control routines */
 
 static int
 rc_modem(struct tty *tp, int biton, int bitoff)
 {
 	struct rc_chans *rc;
 	struct rc_softc *sc;
 	u_char *dtr;
 	u_char msvr;
 
 	rc = tp->t_sc;
 	sc = rc->rc_rcb;
 	dtr = &sc->sc_dtr;
 	rcout(sc, CD180_CAR, rc->rc_chan);
 
 	if (biton == 0 && bitoff == 0) {
 		msvr = rc->rc_msvr = rcin(sc, CD180_MSVR);
 
 		if (msvr & MSVR_RTS)
 			biton |= SER_RTS;
 		if (msvr & MSVR_CTS)
 			biton |= SER_CTS;
 		if (msvr & MSVR_DSR)
 			biton |= SER_DSR;
 		if (msvr & MSVR_DTR)
 			biton |= SER_DTR;
 		if (msvr & MSVR_CD)
 			biton |= SER_DCD;
 		if (~rcin(sc, RC_RIREG) & (1 << rc->rc_chan))
 			biton |= SER_RI;
 		return biton;
 	}
 	if (biton & SER_DTR)
 		rcout(sc, RC_DTREG, ~(*dtr |= 1 << rc->rc_chan));
 	if (bitoff & SER_DTR)
 		rcout(sc, RC_DTREG, ~(*dtr &= ~(1 << rc->rc_chan)));
 	msvr = rcin(sc, CD180_MSVR);
 	if (biton & SER_DTR)
 		msvr |= MSVR_DTR;
 	if (bitoff & SER_DTR)
 		msvr &= ~MSVR_DTR;
 	if (biton & SER_RTS)
 		msvr |= MSVR_RTS;
 	if (bitoff & SER_RTS)
 		msvr &= ~MSVR_RTS;
 	rcout(sc, CD180_MSVR, msvr);
 	return 0;
 }
 
 static void
 rc_break(struct tty *tp, int brk)
 {
 	struct rc_chans *rc;
 
 	rc = tp->t_sc;
 
 	if (brk)
 		rc->rc_pendcmd = CD180_C_SBRK;
 	else
 		rc->rc_pendcmd = CD180_C_EBRK;
 }
 
 #define ERR(s) do {							\
 	device_printf(sc->sc_dev, "%s", "");				\
 	printf s ;							\
 	printf("\n");							\
 	(void) splx(old_level);						\
 	return 1;							\
 } while (0)
 
 /* Test the board. */
 int
 rc_test(struct rc_softc *sc)
 {
 	int     chan = 0;
 	int     i = 0, rcnt, old_level;
 	unsigned int    iack, chipid;
 	unsigned short  divs;
 	static  u_char  ctest[] = "\377\125\252\045\244\0\377";
 #define CTLEN   8
 
 	struct rtest {
 		u_char  txbuf[CD180_NFIFO];     /* TX buffer  */
 		u_char  rxbuf[CD180_NFIFO];     /* RX buffer  */
 		int     rxptr;                  /* RX pointer */
 		int     txptr;                  /* TX pointer */
 	} tchans[CD180_NCHAN];
 
 	old_level = spltty();
 
 	chipid = RC_FAKEID;
 
 	/* First, reset board to initial state */
 	rc_hwreset(sc, chipid);
 
 	divs = RC_BRD(19200);
 
 	/* Initialize channels */
 	for (chan = 0; chan < CD180_NCHAN; chan++) {
 
 		/* Select and reset channel */
 		rcout(sc, CD180_CAR, chan);
 		CCRCMD(sc, chan, CCR_ResetChan);
 		WAITFORCCR(sc, chan);
 
 		/* Set speed */
 		rcout(sc, CD180_RBPRL, divs & 0xFF);
 		rcout(sc, CD180_RBPRH, divs >> 8);
 		rcout(sc, CD180_TBPRL, divs & 0xFF);
 		rcout(sc, CD180_TBPRH, divs >> 8);
 
 		/* set timeout value */
 		rcout(sc, CD180_RTPR,  0);
 
 		/* Establish local loopback */
 		rcout(sc, CD180_COR1, COR1_NOPAR | COR1_8BITS | COR1_1SB);
 		rcout(sc, CD180_COR2, COR2_LLM);
 		rcout(sc, CD180_COR3, CD180_NFIFO);
 		CCRCMD(sc, chan, CCR_CORCHG1 | CCR_CORCHG2 | CCR_CORCHG3);
 		CCRCMD(sc, chan, CCR_RCVREN | CCR_XMTREN);
 		WAITFORCCR(sc, chan);
 		rcout(sc, CD180_MSVR, MSVR_RTS);
 
 		/* Fill TXBUF with test data */
 		for (i = 0; i < CD180_NFIFO; i++) {
 			tchans[chan].txbuf[i] = ctest[i];
 			tchans[chan].rxbuf[i] = 0;
 		}
 		tchans[chan].txptr = tchans[chan].rxptr = 0;
 
 		/* Now, start transmit */
 		rcout(sc, CD180_IER, IER_TxMpty|IER_RxData);
 	}
 	/* Pseudo-interrupt poll stuff */
 	for (rcnt = 10000; rcnt-- > 0; rcnt--) {
 		i = ~(rcin(sc, RC_BSR));
 		if (i & RC_BSR_TOUT)
 			ERR(("BSR timeout bit set\n"));
 		else if (i & RC_BSR_TXINT) {
 			iack = rcin(sc, RC_PILR_TX);
 			if (iack != (GIVR_IT_TDI | chipid))
 				ERR(("Bad TX intr ack (%02x != %02x)\n",
 					iack, GIVR_IT_TDI | chipid));
 			chan = (rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH;
 			/* If no more data to transmit, disable TX intr */
 			if (tchans[chan].txptr >= CD180_NFIFO) {
 				iack = rcin(sc, CD180_IER);
 				rcout(sc, CD180_IER, iack & ~IER_TxMpty);
 			} else {
 				for (iack = tchans[chan].txptr;
 				    iack < CD180_NFIFO; iack++)
 					rcout(sc, CD180_TDR,
 					    tchans[chan].txbuf[iack]);
 				tchans[chan].txptr = iack;
 			}
 			rcout(sc, CD180_EOIR, 0);
 		} else if (i & RC_BSR_RXINT) {
 			u_char ucnt;
 
 			iack = rcin(sc, RC_PILR_RX);
 			if (iack != (GIVR_IT_RGDI | chipid) &&
 			    iack != (GIVR_IT_REI  | chipid))
 				ERR(("Bad RX intr ack (%02x != %02x)\n",
 					iack, GIVR_IT_RGDI | chipid));
 			chan = (rcin(sc, CD180_GICR) & GICR_CHAN) >> GICR_LSH;
 			ucnt = rcin(sc, CD180_RDCR) & 0xF;
 			while (ucnt-- > 0) {
 				iack = rcin(sc, CD180_RCSR);
 				if (iack & RCSR_Timeout)
 					break;
 				if (iack & 0xF)
 					ERR(("Bad char chan %d (RCSR = %02X)\n",
 					    chan, iack));
 				if (tchans[chan].rxptr > CD180_NFIFO)
 					ERR(("Got extra chars chan %d\n",
 					    chan));
 				tchans[chan].rxbuf[tchans[chan].rxptr++] =
 					rcin(sc, CD180_RDR);
 			}
 			rcout(sc, CD180_EOIR, 0);
 		}
 		rcout(sc, RC_CTOUT, 0);
 		for (iack = chan = 0; chan < CD180_NCHAN; chan++)
 			if (tchans[chan].rxptr >= CD180_NFIFO)
 				iack++;
 		if (iack == CD180_NCHAN)
 			break;
 	}
 	for (chan = 0; chan < CD180_NCHAN; chan++) {
 		/* Select and reset channel */
 		rcout(sc, CD180_CAR, chan);
 		CCRCMD(sc, chan, CCR_ResetChan);
 	}
 
 	if (!rcnt)
 		ERR(("looses characters during local loopback\n"));
 	/* Now, check data */
 	for (chan = 0; chan < CD180_NCHAN; chan++)
 		for (i = 0; i < CD180_NFIFO; i++)
 			if (ctest[i] != tchans[chan].rxbuf[i])
 				ERR(("data mismatch chan %d ptr %d (%d != %d)\n",
 				    chan, i, ctest[i], tchans[chan].rxbuf[i]));
 	(void) splx(old_level);
 	return 0;
 }
 
 #ifdef RCDEBUG
 static void
 printrcflags(struct rc_chans *rc, char *comment)
 {
 	struct rc_softc *sc;
 	u_short f = rc->rc_flags;
 
 	sc = rc->rc_rcb;
 	printf("rc%d/%d: %s flags: %s%s%s%s%s%s%s%s%s%s%s%s\n",
 		rc->rc_rcb->rcb_unit, rc->rc_chan, comment,
 		(f & RC_DTR_OFF)?"DTR_OFF " :"",
 		(f & RC_ACTOUT) ?"ACTOUT " :"",
 		(f & RC_RTSFLOW)?"RTSFLOW " :"",
 		(f & RC_CTSFLOW)?"CTSFLOW " :"",
 		(f & RC_DORXFER)?"DORXFER " :"",
 		(f & RC_DOXXFER)?"DOXXFER " :"",
 		(f & RC_MODCHG) ?"MODCHG "  :"",
 		(f & RC_OSUSP)  ?"OSUSP " :"",
 		(f & RC_OSBUSY) ?"OSBUSY " :"",
 		(f & RC_WAS_BUFOVFL) ?"BUFOVFL " :"",
 		(f & RC_WAS_SILOVFL) ?"SILOVFL " :"",
 		(f & RC_SEND_RDY) ?"SEND_RDY":"");
 
 	rcout(sc, CD180_CAR, rc->rc_chan);
 
 	printf("rc%d/%d: msvr %02x ier %02x ccsr %02x\n",
 		rc->rc_rcb->rcb_unit, rc->rc_chan,
 		rcin(sc, CD180_MSVR),
 		rcin(sc, CD180_IER),
 		rcin(sc, CD180_CCSR));
 }
 #endif /* RCDEBUG */
 
 static void
 rc_discard_output(struct rc_chans *rc)
 {
 	critical_enter();
 	if (rc->rc_flags & RC_DOXXFER) {
 		rc->rc_rcb->sc_scheduled_event -= LOTS_OF_EVENTS;
 		rc->rc_flags &= ~RC_DOXXFER;
 	}
 	rc->rc_optr = rc->rc_obufend;
 	rc->rc_tp->t_state &= ~TS_BUSY;
 	critical_exit();
 	ttwwakeup(rc->rc_tp);
 }
 
 static void
 rc_wait0(struct rc_softc *sc, int chan, int line)
 {
 	int rcnt;
 
 	for (rcnt = 50; rcnt && rcin(sc, CD180_CCR); rcnt--)
 		DELAY(30);
 	if (rcnt == 0)
 		device_printf(sc->sc_dev,
 		    "channel %d command timeout, rc.c line: %d\n", chan, line);
 }
 
 static device_method_t rc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		rc_probe),
 	DEVMETHOD(device_attach,	rc_attach),
 	DEVMETHOD(device_detach,	rc_detach),
 	{ 0, 0 }
 };
 
 static driver_t rc_driver = {
 	"rc",
 	rc_methods, sizeof(struct rc_softc),
 };
 
 DRIVER_MODULE(rc, isa, rc_driver, rc_devclass, 0, 0);
Index: head/sys/dev/rc/rcreg.h
===================================================================
--- head/sys/dev/rc/rcreg.h	(revision 344854)
+++ head/sys/dev/rc/rcreg.h	(revision 344855)
@@ -1,72 +1,72 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 1995 by Pavel Antonov, Moscow, Russia.
  * Copyright (C) 1995 by Andrey A. Chernov, Moscow, Russia.
- * Copyright (C) 2002 by John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
+ * Copyright (C) 2002 by John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Cirrus Logic CD180 -based RISCom/8 board definitions
  */
 
 /* Oscillator frequency - 19660.08Mhz / 2 */
 #define RC_OSCFREQ      9830400
 
 #define RC_BRD(s) ((s) == 0 ? 0 : \
 	(((RC_OSCFREQ + (s) / 2) / (s)) + CD180_CTICKS/2) / CD180_CTICKS)
 
 /* Riscom/8 board ISA I/O mapping */
 #define RC_IOMAP(r)     ((((r) & 07) << 1) | (((r) & ~07) << 7))
 
 /* I/O commands */
 #define RC_OUT(sc, addr, value)						\
 	bus_space_write_1((sc)->sc_bt, (sc)->sc_bh, RC_IOMAP(addr), (value))
 #define RC_IN(sc, addr)						\
 	bus_space_read_1((sc)->sc_bt, (sc)->sc_bh, RC_IOMAP(addr))
 
 /* Riscom on-board registers (mapping assumed) */
 #define RC_RIREG        0x100   /* Ring Indicator Register (read-only)  */
 #define RC_DTREG        0x100   /* DTR Register (write-only)            */
 #define RC_BSR          0x101   /* Board Status Register (read-only)    */
 #define RC_CTOUT        0x101   /* Clear Timeout (write-only)           */
 
 /* Board Status Register */
 #define RC_BSR_TOUT     0x08    /* Timeout                              */
 #define RC_BSR_RXINT    0x04    /* Receiver Interrupt                   */
 #define RC_BSR_TXINT    0x02    /* Transmitter Interrupt                */
 #define RC_BSR_MOINT    0x01    /* Modem Control Interrupt              */
 
 /* Interrupt groups */
 #define RC_MODEMGRP     0x01    /* Modem interrupt group                */
 #define RC_RXGRP        0x02    /* Receiver interrupt group             */
 #define RC_TXGRP        0x04    /* Transmitter interrupt group          */
 
 /* Priority Interrupt Level definitions */
 #define RC_PILR_MODEM   (0x80 | RC_MODEMGRP)
 #define RC_PILR_RX      (0x80 | RC_RXGRP   )
 #define RC_PILR_TX      (0x80 | RC_TXGRP   )
Index: head/sys/i386/pci/pci_pir.c
===================================================================
--- head/sys/i386/pci/pci_pir.c	(revision 344854)
+++ head/sys/i386/pci/pci_pir.c	(revision 344855)
@@ -1,744 +1,744 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000, BSDi
- * Copyright (c) 2004, John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
+ * Copyright (c) 2004, John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <machine/md_var.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <machine/pci_cfgreg.h>
 #include <machine/segments.h>
 #include <machine/pc/bios.h>
 
 #define	NUM_ISA_INTERRUPTS	16
 
 /*
  * A link device.  Loosely based on the ACPI PCI link device.  This doesn't
  * try to support priorities for different ISA interrupts.
  */
 struct pci_link {
 	TAILQ_ENTRY(pci_link) pl_links;
 	uint8_t		pl_id;
 	uint8_t		pl_irq;
 	uint16_t	pl_irqmask;
 	int		pl_references;
 	int		pl_routed;
 };
 
 struct pci_link_lookup {
 	struct pci_link	**pci_link_ptr;
 	int		bus;
 	int		device;
 	int		pin;
 };
 
 struct pci_dev_lookup {
 	uint8_t		link;
 	int		bus;
 	int		device;
 	int		pin;
 };
 
 typedef void pir_entry_handler(struct PIR_entry *entry,
     struct PIR_intpin* intpin, void *arg);
 
 static void	pci_print_irqmask(u_int16_t irqs);
 static int	pci_pir_biosroute(int bus, int device, int func, int pin,
 		    int irq);
 static int	pci_pir_choose_irq(struct pci_link *pci_link, int irqmask);
 static void	pci_pir_create_links(struct PIR_entry *entry,
 		    struct PIR_intpin *intpin, void *arg);
 static void	pci_pir_dump_links(void);
 static struct pci_link *pci_pir_find_link(uint8_t link_id);
 static void	pci_pir_find_link_handler(struct PIR_entry *entry,
 		    struct PIR_intpin *intpin, void *arg);
 static void	pci_pir_initial_irqs(struct PIR_entry *entry,
 		    struct PIR_intpin *intpin, void *arg);
 static void	pci_pir_parse(void);
 static uint8_t	pci_pir_search_irq(int bus, int device, int pin);
 static int	pci_pir_valid_irq(struct pci_link *pci_link, int irq);
 static void	pci_pir_walk_table(pir_entry_handler *handler, void *arg);
 
 static MALLOC_DEFINE(M_PIR, "$PIR", "$PIR structures");
 
 static struct PIR_table *pci_route_table;
 static device_t pir_device;
 static int pci_route_count, pir_bios_irqs, pir_parsed;
 static TAILQ_HEAD(, pci_link) pci_links;
 static int pir_interrupt_weight[NUM_ISA_INTERRUPTS];
 
 /* sysctl vars */
 SYSCTL_DECL(_hw_pci);
 
 /* XXX this likely should live in a header file */
 /* IRQs 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15 */
 #define PCI_IRQ_OVERRIDE_MASK 0xdef8
 
 static uint32_t pci_irq_override_mask = PCI_IRQ_OVERRIDE_MASK;
 SYSCTL_INT(_hw_pci, OID_AUTO, irq_override_mask, CTLFLAG_RDTUN,
     &pci_irq_override_mask, PCI_IRQ_OVERRIDE_MASK,
     "Mask of allowed irqs to try to route when it has no good clue about\n"
     "which irqs it should use.");
 
 /*
  * Look for the interrupt routing table.
  *
  * We use PCI BIOS's PIR table if it's available. $PIR is the standard way
  * to do this.  Sadly, some machines are not standards conforming and have
  * _PIR instead.  We shrug and cope by looking for both.
  */
 void
 pci_pir_open(void)
 {
 	struct PIR_table *pt;
 	uint32_t sigaddr;
 	int i;
 	uint8_t ck, *cv;
 
 	/* Don't try if we've already found a table. */
 	if (pci_route_table != NULL)
 		return;
 
 	/* Look for $PIR and then _PIR. */
 	sigaddr = bios_sigsearch(0, "$PIR", 4, 16, 0);
 	if (sigaddr == 0)
 		sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
 	if (sigaddr == 0)
 		return;
 
 	/* If we found something, check the checksum and length. */
 	/* XXX - Use pmap_mapdev()? */
 	pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
 	if (pt->pt_header.ph_length <= sizeof(struct PIR_header))
 		return;
 	for (cv = (u_int8_t *)pt, ck = 0, i = 0;
 	     i < (pt->pt_header.ph_length); i++)
 		ck += cv[i];
 	if (ck != 0)
 		return;
 
 	/* Ok, we've got a valid table. */
 	pci_route_table = pt;
 	pci_route_count = (pt->pt_header.ph_length -
 	    sizeof(struct PIR_header)) / 
 	    sizeof(struct PIR_entry);
 }
 
 /*
  * Find the pci_link structure for a given link ID.
  */
 static struct pci_link *
 pci_pir_find_link(uint8_t link_id)
 {
 	struct pci_link *pci_link;
 
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		if (pci_link->pl_id == link_id)
 			return (pci_link);
 	}
 	return (NULL);
 }
 
 /*
  * Find the link device associated with a PCI device in the table.
  */
 static void
 pci_pir_find_link_handler(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_link_lookup *lookup;
 
 	lookup = (struct pci_link_lookup *)arg;
 	if (entry->pe_bus == lookup->bus &&
 	    entry->pe_device == lookup->device &&
 	    intpin - entry->pe_intpin == lookup->pin)
 		*lookup->pci_link_ptr = pci_pir_find_link(intpin->link);
 }
 
 /*
  * Check to see if a possible IRQ setting is valid.
  */
 static int
 pci_pir_valid_irq(struct pci_link *pci_link, int irq)
 {
 
 	if (!PCI_INTERRUPT_VALID(irq))
 		return (0);
 	return (pci_link->pl_irqmask & (1 << irq));
 }
 
 /*
  * Walk the $PIR executing the worker function for each valid intpin entry
  * in the table.  The handler is passed a pointer to both the entry and
  * the intpin in the entry.
  */
 static void
 pci_pir_walk_table(pir_entry_handler *handler, void *arg)
 {
 	struct PIR_entry *entry;
 	struct PIR_intpin *intpin;
 	int i, pin;
 
 	entry = &pci_route_table->pt_entry[0];
 	for (i = 0; i < pci_route_count; i++, entry++) {
 		intpin = &entry->pe_intpin[0];
 		for (pin = 0; pin < 4; pin++, intpin++)
 			if (intpin->link != 0)
 				handler(entry, intpin, arg);
 	}
 }
 
 static void
 pci_pir_create_links(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_link *pci_link;
 
 	pci_link = pci_pir_find_link(intpin->link);
 	if (pci_link != NULL) {
 		pci_link->pl_references++;
 		if (intpin->irqs != pci_link->pl_irqmask) {
 			if (bootverbose)
 				printf(
 	"$PIR: Entry %d.%d.INT%c has different mask for link %#x, merging\n",
 				    entry->pe_bus, entry->pe_device,
 				    (intpin - entry->pe_intpin) + 'A',
 				    pci_link->pl_id);
 			pci_link->pl_irqmask &= intpin->irqs;
 		}
 	} else {
 		pci_link = malloc(sizeof(struct pci_link), M_PIR, M_WAITOK);
 		pci_link->pl_id = intpin->link;
 		pci_link->pl_irqmask = intpin->irqs;
 		pci_link->pl_irq = PCI_INVALID_IRQ;
 		pci_link->pl_references = 1;
 		pci_link->pl_routed = 0;
 		TAILQ_INSERT_TAIL(&pci_links, pci_link, pl_links);
 	}
 }
 
 /*
  * Look to see if any of the function on the PCI device at bus/device have
  * an interrupt routed to intpin 'pin' by the BIOS.
  */
 static uint8_t
 pci_pir_search_irq(int bus, int device, int pin)
 {
 	uint32_t value;
 	uint8_t func, maxfunc;
 
 	/* See if we have a valid device at function 0. */
 	value = pci_cfgregread(bus, device, 0, PCIR_HDRTYPE, 1);
 	if ((value & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 		return (PCI_INVALID_IRQ);
 	if (value & PCIM_MFDEV)
 		maxfunc = PCI_FUNCMAX;
 	else
 		maxfunc = 0;
 
 	/* Scan all possible functions at this device. */
 	for (func = 0; func <= maxfunc; func++) {
 		value = pci_cfgregread(bus, device, func, PCIR_DEVVENDOR, 4);
 		if (value == 0xffffffff)
 			continue;
 		value = pci_cfgregread(bus, device, func, PCIR_INTPIN, 1);
 
 		/*
 		 * See if it uses the pin in question.  Note that the passed
 		 * in pin uses 0 for A, .. 3 for D whereas the intpin
 		 * register uses 0 for no interrupt, 1 for A, .. 4 for D.
 		 */
 		if (value != pin + 1)
 			continue;
 		value = pci_cfgregread(bus, device, func, PCIR_INTLINE, 1);
 		if (bootverbose)
 			printf(
 		"$PIR: Found matching pin for %d.%d.INT%c at func %d: %d\n",
 			    bus, device, pin + 'A', func, value);
 		if (value != PCI_INVALID_IRQ)
 			return (value);
 	}
 	return (PCI_INVALID_IRQ);
 }
 
 /*
  * Try to initialize IRQ based on this device's IRQ.
  */
 static void
 pci_pir_initial_irqs(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_link *pci_link;
 	uint8_t irq, pin;
 
 	pin = intpin - entry->pe_intpin;
 	pci_link = pci_pir_find_link(intpin->link);
 	irq = pci_pir_search_irq(entry->pe_bus, entry->pe_device, pin);
 	if (irq == PCI_INVALID_IRQ || irq == pci_link->pl_irq)
 		return;
 
 	/* Don't trust any BIOS IRQs greater than 15. */
 	if (irq >= NUM_ISA_INTERRUPTS) {
 		printf(
 	"$PIR: Ignoring invalid BIOS IRQ %d from %d.%d.INT%c for link %#x\n",
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id);
 		return;
 	}
 
 	/*
 	 * If we don't have an IRQ for this link yet, then we trust the
 	 * BIOS, even if it seems invalid from the $PIR entries.
 	 */
 	if (pci_link->pl_irq == PCI_INVALID_IRQ) {
 		if (!pci_pir_valid_irq(pci_link, irq))
 			printf(
 	"$PIR: Using invalid BIOS IRQ %d from %d.%d.INT%c for link %#x\n",
 			    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 			    pci_link->pl_id);
 		pci_link->pl_irq = irq;
 		pci_link->pl_routed = 1;
 		return;
 	}
 
 	/*
 	 * We have an IRQ and it doesn't match the current IRQ for this
 	 * link.  If the new IRQ is invalid, then warn about it and ignore
 	 * it.  If the old IRQ is invalid and the new IRQ is valid, then
 	 * prefer the new IRQ instead.  If both IRQs are valid, then just
 	 * use the first one.  Note that if we ever get into this situation
 	 * we are having to guess which setting the BIOS actually routed.
 	 * Perhaps we should just give up instead.
 	 */
 	if (!pci_pir_valid_irq(pci_link, irq)) {
 		printf(
 		"$PIR: BIOS IRQ %d for %d.%d.INT%c is not valid for link %#x\n",
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id);
 	} else if (!pci_pir_valid_irq(pci_link, pci_link->pl_irq)) {
 		printf(
 "$PIR: Preferring valid BIOS IRQ %d from %d.%d.INT%c for link %#x to IRQ %d\n", 
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id, pci_link->pl_irq);
 		pci_link->pl_irq = irq;
 		pci_link->pl_routed = 1;
 	} else
 		printf(
 	"$PIR: BIOS IRQ %d for %d.%d.INT%c does not match link %#x irq %d\n",
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id, pci_link->pl_irq);
 }
 
 /*
  * Parse $PIR to enumerate link devices and attempt to determine their
  * initial state.  This could perhaps be cleaner if we had drivers for the
  * various interrupt routers as they could read the initial IRQ for each
  * link.
  */
 static void
 pci_pir_parse(void)
 {
 	char tunable_buffer[64];
 	struct pci_link *pci_link;
 	int i, irq;
 
 	/* Only parse once. */
 	if (pir_parsed)
 		return;
 	pir_parsed = 1;
 
 	/* Enumerate link devices. */
 	TAILQ_INIT(&pci_links);
 	pci_pir_walk_table(pci_pir_create_links, NULL);
 	if (bootverbose) {
 		printf("$PIR: Links after initial probe:\n");
 		pci_pir_dump_links();
 	}
 
 	/*
 	 * Check to see if the BIOS has already routed any of the links by
 	 * checking each device connected to each link to see if it has a
 	 * valid IRQ.
 	 */
 	pci_pir_walk_table(pci_pir_initial_irqs, NULL);
 	if (bootverbose) {
 		printf("$PIR: Links after initial IRQ discovery:\n");
 		pci_pir_dump_links();
 	}
 
 	/*
 	 * Allow the user to override the IRQ for a given link device.  We
 	 * allow invalid IRQs to be specified but warn about them.  An IRQ
 	 * of 255 or 0 clears any preset IRQ.
 	 */
 	i = 0;
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		snprintf(tunable_buffer, sizeof(tunable_buffer),
 		    "hw.pci.link.%#x.irq", pci_link->pl_id);
 		if (getenv_int(tunable_buffer, &irq) == 0)
 			continue;
 		if (irq == 0)
 			irq = PCI_INVALID_IRQ;
 		if (irq != PCI_INVALID_IRQ &&
 		    !pci_pir_valid_irq(pci_link, irq) && bootverbose)
 			printf(
 		"$PIR: Warning, IRQ %d for link %#x is not listed as valid\n",
 			    irq, pci_link->pl_id);
 		pci_link->pl_routed = 0;
 		pci_link->pl_irq = irq;
 		i = 1;
 	}
 	if (bootverbose && i) {
 		printf("$PIR: Links after tunable overrides:\n");
 		pci_pir_dump_links();
 	}
 
 	/*
 	 * Build initial interrupt weights as well as bitmap of "known-good"
 	 * IRQs that the BIOS has already used for PCI link devices.
 	 */
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		if (!PCI_INTERRUPT_VALID(pci_link->pl_irq))
 			continue;
 		pir_bios_irqs |= 1 << pci_link->pl_irq;
 		pir_interrupt_weight[pci_link->pl_irq] +=
 		    pci_link->pl_references;
 	}
 	if (bootverbose) {
 		printf("$PIR: IRQs used by BIOS: ");
 		pci_print_irqmask(pir_bios_irqs);
 		printf("\n");
 		printf("$PIR: Interrupt Weights:\n[ ");
 		for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
 			printf(" %3d", i);
 		printf(" ]\n[ ");
 		for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
 			printf(" %3d", pir_interrupt_weight[i]);
 		printf(" ]\n");
 	}
 }
 
 /*
  * Use the PCI BIOS to route an interrupt for a given device.
  *
  * Input:
  * AX = PCIBIOS_ROUTE_INTERRUPT
  * BH = bus
  * BL = device [7:3] / function [2:0]
  * CH = IRQ
  * CL = Interrupt Pin (0x0A = A, ... 0x0D = D)
  */
 static int
 pci_pir_biosroute(int bus, int device, int func, int pin, int irq)
 {
 	struct bios_regs args;
 
 	args.eax = PCIBIOS_ROUTE_INTERRUPT;
 	args.ebx = (bus << 8) | (device << 3) | func;
 	args.ecx = (irq << 8) | (0xa + pin);
 	return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
 }
 
 
 /*
  * Route a PCI interrupt using a link device from the $PIR.
  */
 int
 pci_pir_route_interrupt(int bus, int device, int func, int pin)
 {
 	struct pci_link_lookup lookup;
 	struct pci_link *pci_link;
 	int error, irq;
 
 	if (pci_route_table == NULL)
 		return (PCI_INVALID_IRQ);
 
 	/* Lookup link device for this PCI device/pin. */
 	pci_link = NULL;
 	lookup.bus = bus;
 	lookup.device = device;
 	lookup.pin = pin - 1;
 	lookup.pci_link_ptr = &pci_link;
 	pci_pir_walk_table(pci_pir_find_link_handler, &lookup);
 	if (pci_link == NULL) {
 		printf("$PIR: No matching entry for %d.%d.INT%c\n", bus,
 		    device, pin - 1 + 'A');
 		return (PCI_INVALID_IRQ);
 	}
 
 	/*
 	 * Pick a new interrupt if we don't have one already.  We look
 	 * for an interrupt from several different sets.  First, if
 	 * this link only has one valid IRQ, use that.  Second, we
 	 * check the set of PCI only interrupts from the $PIR.  Third,
 	 * we check the set of known-good interrupts that the BIOS has
 	 * already used.  Lastly, we check the "all possible valid
 	 * IRQs" set.
 	 */
 	if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) {
 		if (pci_link->pl_irqmask != 0 && powerof2(pci_link->pl_irqmask))
 			irq = ffs(pci_link->pl_irqmask) - 1;
 		else
 			irq = pci_pir_choose_irq(pci_link,
 			    pci_route_table->pt_header.ph_pci_irqs);
 		if (!PCI_INTERRUPT_VALID(irq))
 			irq = pci_pir_choose_irq(pci_link, pir_bios_irqs);
 		if (!PCI_INTERRUPT_VALID(irq))
 			irq = pci_pir_choose_irq(pci_link,
 			    pci_irq_override_mask);
 		if (!PCI_INTERRUPT_VALID(irq)) {
 			if (bootverbose)
 				printf(
 			"$PIR: Failed to route interrupt for %d:%d INT%c\n",
 				    bus, device, pin - 1 + 'A');
 			return (PCI_INVALID_IRQ);
 		}
 		pci_link->pl_irq = irq;
 	}
 
 	/* Ask the BIOS to route this IRQ if we haven't done so already. */
 	if (!pci_link->pl_routed) {
 		error = pci_pir_biosroute(bus, device, func, pin - 1,
 		    pci_link->pl_irq);
 
 		/* Ignore errors when routing a unique interrupt. */
 		if (error && !powerof2(pci_link->pl_irqmask)) {
 			printf("$PIR: ROUTE_INTERRUPT failed.\n");
 			return (PCI_INVALID_IRQ);
 		}
 		pci_link->pl_routed = 1;
 
 		/* Ensure the interrupt is set to level/low trigger. */
 		KASSERT(pir_device != NULL, ("missing pir device"));
 		BUS_CONFIG_INTR(pir_device, pci_link->pl_irq,
 		    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
 	}
 	if (bootverbose)
 		printf("$PIR: %d:%d INT%c routed to irq %d\n", bus, device,
 		    pin - 1 + 'A', pci_link->pl_irq);
 	return (pci_link->pl_irq);
 }
 
 /*
  * Try to pick an interrupt for the specified link from the interrupts
  * set in the mask.
  */
 static int
 pci_pir_choose_irq(struct pci_link *pci_link, int irqmask)
 {
 	int i, irq, realmask;
 
 	/* XXX: Need to have a #define of known bad IRQs to also mask out? */
 	realmask = pci_link->pl_irqmask & irqmask;
 	if (realmask == 0)
 		return (PCI_INVALID_IRQ);
 
 	/* Find IRQ with lowest weight. */
 	irq = PCI_INVALID_IRQ;
 	for (i = 0; i < NUM_ISA_INTERRUPTS; i++) {
 		if (!(realmask & 1 << i))
 			continue;
 		if (irq == PCI_INVALID_IRQ ||
 		    pir_interrupt_weight[i] < pir_interrupt_weight[irq])
 			irq = i;
 	}
 	if (bootverbose && PCI_INTERRUPT_VALID(irq)) {
 		printf("$PIR: Found IRQ %d for link %#x from ", irq,
 		    pci_link->pl_id);
 		pci_print_irqmask(realmask);
 		printf("\n");
 	}
 	return (irq);
 }
 
 static void
 pci_print_irqmask(u_int16_t irqs)
 {
 	int i, first;
 
 	if (irqs == 0) {
 		printf("none");
 		return;
 	}
 	first = 1;
 	for (i = 0; i < 16; i++, irqs >>= 1)
 		if (irqs & 1) {
 			if (!first)
 				printf(" ");
 			else
 				first = 0;
 			printf("%d", i);
 		}
 }
 
 /*
  * Display link devices.
  */
 static void
 pci_pir_dump_links(void)
 {
 	struct pci_link *pci_link;
 
 	printf("Link  IRQ  Rtd  Ref  IRQs\n");
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		printf("%#4x  %3d   %c   %3d  ", pci_link->pl_id,
 		    pci_link->pl_irq, pci_link->pl_routed ? 'Y' : 'N',
 		    pci_link->pl_references);
 		pci_print_irqmask(pci_link->pl_irqmask);
 		printf("\n");
 	}
 }
 
 /*
  * See if any interrupts for a given PCI bus are routed in the PIR.  Don't
  * even bother looking if the BIOS doesn't support routing anyways.  If we
  * are probing a PCI-PCI bridge, then require_parse will be true and we should
  * only succeed if a host-PCI bridge has already attached and parsed the PIR.
  */
 int
 pci_pir_probe(int bus, int require_parse)
 {
 	int i;
 
 	if (pci_route_table == NULL || (require_parse && !pir_parsed))
 		return (0);
 	for (i = 0; i < pci_route_count; i++)
 		if (pci_route_table->pt_entry[i].pe_bus == bus)
 			return (1);
 	return (0);
 }
 
 /*
  * The driver for the new-bus pseudo device pir0 for the $PIR table.
  */
 
 static int
 pir_probe(device_t dev)
 {
 	char buf[64];
 
 	snprintf(buf, sizeof(buf), "PCI Interrupt Routing Table: %d Entries",
 	    pci_route_count);
 	device_set_desc_copy(dev, buf);
 	return (0);
 }
 
 static int
 pir_attach(device_t dev)
 {
 
 	pci_pir_parse();
 	KASSERT(pir_device == NULL, ("Multiple pir devices"));
 	pir_device = dev;
 	return (0);
 }
 
 static void
 pir_resume_find_device(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_dev_lookup *pd;
 
 	pd = (struct pci_dev_lookup *)arg;
 	if (intpin->link != pd->link || pd->bus != -1)
 		return;
 	pd->bus = entry->pe_bus;
 	pd->device = entry->pe_device;
 	pd->pin = intpin - entry->pe_intpin;
 }
 
 static int
 pir_resume(device_t dev)
 {
 	struct pci_dev_lookup pd;
 	struct pci_link *pci_link;
 	int error;
 
 	/* Ask the BIOS to re-route each link that was already routed. */
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) {
 			KASSERT(!pci_link->pl_routed,
 			    ("link %#x is routed but has invalid PCI IRQ",
 			    pci_link->pl_id));
 			continue;
 		}
 		if (pci_link->pl_routed) {
 			pd.bus = -1;
 			pd.link = pci_link->pl_id;
 			pci_pir_walk_table(pir_resume_find_device, &pd);
 			KASSERT(pd.bus != -1,
 		("did not find matching entry for link %#x in the $PIR table",
 			    pci_link->pl_id));
 			if (bootverbose)
 				device_printf(dev,
 			    "Using %d.%d.INT%c to route link %#x to IRQ %d\n",
 				    pd.bus, pd.device, pd.pin + 'A',
 				    pci_link->pl_id, pci_link->pl_irq);
 			error = pci_pir_biosroute(pd.bus, pd.device, 0, pd.pin,
 			    pci_link->pl_irq);
 			if (error)
 				device_printf(dev,
 			    "ROUTE_INTERRUPT on resume for link %#x failed.\n",
 				    pci_link->pl_id);
 		}
 	}
 	return (0);
 }
 
 static device_method_t pir_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		pir_probe),
 	DEVMETHOD(device_attach,	pir_attach),
 	DEVMETHOD(device_resume,	pir_resume),
 
 	{ 0, 0 }
 };
 
 static driver_t pir_driver = {
 	"pir",
 	pir_methods,
 	1,
 };
 
 static devclass_t pir_devclass;
 
 DRIVER_MODULE(pir, legacy, pir_driver, pir_devclass, 0, 0);
Index: head/sys/kern/kern_ktr.c
===================================================================
--- head/sys/kern/kern_ktr.c	(revision 344854)
+++ head/sys/kern/kern_ktr.c	(revision 344855)
@@ -1,476 +1,475 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This module holds the global variables used by KTR and the ktr_tracepoint()
  * function that does the actual tracing.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ktr.h"
 #include "opt_alq.h"
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/alq.h>
 #include <sys/cons.h>
 #include <sys/cpuset.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <machine/cpu.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_output.h>
 #endif
 
 #ifndef KTR_BOOT_ENTRIES
 #define	KTR_BOOT_ENTRIES	1024
 #endif
 
 #ifndef KTR_ENTRIES
 #define	KTR_ENTRIES	1024
 #endif
 
 /* Limit the allocations to something manageable. */
 #define	KTR_ENTRIES_MAX	(8 * 1024 * 1024)
 
 #ifndef KTR_MASK
 #define	KTR_MASK	(0)
 #endif
 
 #ifndef KTR_CPUMASK
 #define	KTR_CPUMASK	CPUSET_FSET
 #endif
 
 #ifndef KTR_TIME
 #define	KTR_TIME	get_cyclecount()
 #endif
 
 #ifndef KTR_CPU
 #define	KTR_CPU		PCPU_GET(cpuid)
 #endif
 
 static MALLOC_DEFINE(M_KTR, "KTR", "KTR");
 
 FEATURE(ktr, "Kernel support for KTR kernel tracing facility");
 
 volatile int	ktr_idx = 0;
 uint64_t ktr_mask = KTR_MASK;
 uint64_t ktr_compile = KTR_COMPILE;
 int	ktr_entries = KTR_BOOT_ENTRIES;
 int	ktr_version = KTR_VERSION;
 struct	ktr_entry ktr_buf_init[KTR_BOOT_ENTRIES];
 struct	ktr_entry *ktr_buf = ktr_buf_init;
 cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK);
 
 static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
 
 SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD,
     &ktr_version, 0, "Version of the KTR interface");
 
 SYSCTL_UQUAD(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD,
     &ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel");
 
 static int
 sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS)
 {
 	char lktr_cpumask_str[CPUSETBUFSIZ];
 	cpuset_t imask;
 	int error;
 
 	cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask);
 	error = sysctl_handle_string(oidp, lktr_cpumask_str,
 	    sizeof(lktr_cpumask_str), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1)
 		return (EINVAL);
 	CPU_COPY(&imask, &ktr_cpumask);
 
 	return (error);
 }
 SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask,
     CTLFLAG_RWTUN | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0,
     sysctl_debug_ktr_cpumask, "S",
     "Bitmask of CPUs on which KTR logging is enabled");
 
 static int
 sysctl_debug_ktr_clear(SYSCTL_HANDLER_ARGS)
 {
 	int clear, error;
 
 	clear = 0;
 	error = sysctl_handle_int(oidp, &clear, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (clear) {
 		bzero(ktr_buf, sizeof(*ktr_buf) * ktr_entries);
 		ktr_idx = 0;
 	}
 
 	return (error);
 }
 SYSCTL_PROC(_debug_ktr, OID_AUTO, clear, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
     sysctl_debug_ktr_clear, "I", "Clear KTR Buffer");
 
 /*
  * This is a sysctl proc so that it is serialized as !MPSAFE along with
  * the other ktr sysctl procs.
  */
 static int
 sysctl_debug_ktr_mask(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t mask;
 	int error;
 
 	mask = ktr_mask;
 	error = sysctl_handle_64(oidp, &mask, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	ktr_mask = mask;
 	return (error);
 }
 
 SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_U64 | CTLFLAG_RWTUN, 0, 0,
     sysctl_debug_ktr_mask, "QU",
     "Bitmask of KTR event classes for which logging is enabled");
 
 #if KTR_ENTRIES > KTR_BOOT_ENTRIES
 /*
  * A simplified version of sysctl_debug_ktr_entries.
  * No need to care about SMP, scheduling, etc.
  */
 static void
 ktr_entries_initializer(void *dummy __unused)
 {
 	uint64_t mask;
 
 	/* Temporarily disable ktr in case malloc() is being traced. */
 	mask = ktr_mask;
 	ktr_mask = 0;
 	ktr_buf = malloc(sizeof(*ktr_buf) * KTR_ENTRIES, M_KTR,
 	    M_WAITOK | M_ZERO);
 	memcpy(ktr_buf, ktr_buf_init + ktr_idx,
 	    (KTR_BOOT_ENTRIES - ktr_idx) * sizeof(*ktr_buf));
 	if (ktr_idx != 0) {
 		memcpy(ktr_buf + KTR_BOOT_ENTRIES - ktr_idx, ktr_buf_init,
 		    ktr_idx * sizeof(*ktr_buf));
 		ktr_idx = KTR_BOOT_ENTRIES;
 	}
 	ktr_entries = KTR_ENTRIES;
 	ktr_mask = mask;
 }
 SYSINIT(ktr_entries_initializer, SI_SUB_KMEM, SI_ORDER_ANY,
     ktr_entries_initializer, NULL);
 #endif
 
 static int
 sysctl_debug_ktr_entries(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t mask;
 	int entries, error;
 	struct ktr_entry *buf, *oldbuf;
 
 	entries = ktr_entries;
 	error = sysctl_handle_int(oidp, &entries, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (entries > KTR_ENTRIES_MAX)
 		return (ERANGE);
 	/* Disable ktr temporarily. */
 	mask = ktr_mask;
 	ktr_mask = 0;
 	/* Wait for threads to go idle. */
 	if ((error = quiesce_all_cpus("ktrent", PCATCH)) != 0) {
 		ktr_mask = mask;
 		return (error);
 	}
 	if (ktr_buf != ktr_buf_init)
 		oldbuf = ktr_buf;
 	else
 		oldbuf = NULL;
 	/* Allocate a new buffer. */
 	buf = malloc(sizeof(*buf) * entries, M_KTR, M_WAITOK | M_ZERO);
 	/* Install the new buffer and restart ktr. */
 	ktr_buf = buf;
 	ktr_entries = entries;
 	ktr_idx = 0;
 	ktr_mask = mask;
 	if (oldbuf != NULL)
 		free(oldbuf, M_KTR);
 
 	return (error);
 }
 
 SYSCTL_PROC(_debug_ktr, OID_AUTO, entries, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
     sysctl_debug_ktr_entries, "I", "Number of entries in the KTR buffer");
 
 #ifdef KTR_VERBOSE
 int	ktr_verbose = KTR_VERBOSE;
 TUNABLE_INT("debug.ktr.verbose", &ktr_verbose);
 SYSCTL_INT(_debug_ktr, OID_AUTO, verbose, CTLFLAG_RW, &ktr_verbose, 0, "");
 #endif
 
 #ifdef KTR_ALQ
 struct alq *ktr_alq;
 char	ktr_alq_file[MAXPATHLEN] = "/tmp/ktr.out";
 int	ktr_alq_cnt = 0;
 int	ktr_alq_depth = KTR_ENTRIES;
 int	ktr_alq_enabled = 0;
 int	ktr_alq_failed = 0;
 int	ktr_alq_max = 0;
 
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_max, CTLFLAG_RW, &ktr_alq_max, 0,
     "Maximum number of entries to write");
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_cnt, CTLFLAG_RD, &ktr_alq_cnt, 0,
     "Current number of written entries");
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_failed, CTLFLAG_RD, &ktr_alq_failed, 0,
     "Number of times we overran the buffer");
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_depth, CTLFLAG_RW, &ktr_alq_depth, 0,
     "Number of items in the write buffer");
 SYSCTL_STRING(_debug_ktr, OID_AUTO, alq_file, CTLFLAG_RW, ktr_alq_file,
     sizeof(ktr_alq_file), "KTR logging file");
 
 static int
 sysctl_debug_ktr_alq_enable(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int enable;
 
 	enable = ktr_alq_enabled;
 
 	error = sysctl_handle_int(oidp, &enable, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (enable) {
 		if (ktr_alq_enabled)
 			return (0);
 		error = alq_open(&ktr_alq, (const char *)ktr_alq_file,
 		    req->td->td_ucred, ALQ_DEFAULT_CMODE,
 		    sizeof(struct ktr_entry), ktr_alq_depth);
 		if (error == 0) {
 			ktr_alq_cnt = 0;
 			ktr_alq_failed = 0;
 			ktr_alq_enabled = 1;
 		}
 	} else {
 		if (ktr_alq_enabled == 0)
 			return (0);
 		ktr_alq_enabled = 0;
 		alq_close(ktr_alq);
 		ktr_alq = NULL;
 	}
 
 	return (error);
 }
 SYSCTL_PROC(_debug_ktr, OID_AUTO, alq_enable,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_debug_ktr_alq_enable,
     "I", "Enable KTR logging");
 #endif
 
 void
 ktr_tracepoint(uint64_t mask, const char *file, int line, const char *format,
     u_long arg1, u_long arg2, u_long arg3, u_long arg4, u_long arg5,
     u_long arg6)
 {
 	struct ktr_entry *entry;
 #ifdef KTR_ALQ
 	struct ale *ale = NULL;
 #endif
 	int newindex, saveindex;
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	struct thread *td;
 #endif
 	int cpu;
 
 	if (panicstr || kdb_active)
 		return;
 	if ((ktr_mask & mask) == 0 || ktr_buf == NULL)
 		return;
 	cpu = KTR_CPU;
 	if (!CPU_ISSET(cpu, &ktr_cpumask))
 		return;
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	td = curthread;
 	if (td->td_pflags & TDP_INKTR)
 		return;
 	td->td_pflags |= TDP_INKTR;
 #endif
 #ifdef KTR_ALQ
 	if (ktr_alq_enabled) {
 		if (td->td_critnest == 0 &&
 		    (TD_IS_IDLETHREAD(td)) == 0 &&
 		    td != ald_thread) {
 			if (ktr_alq_max && ktr_alq_cnt > ktr_alq_max)
 				goto done;
 			if ((ale = alq_get(ktr_alq, ALQ_NOWAIT)) == NULL) {
 				ktr_alq_failed++;
 				goto done;
 			}
 			ktr_alq_cnt++;
 			entry = (struct ktr_entry *)ale->ae_data;
 		} else {
 			goto done;
 		}
 	} else
 #endif
 	{
 		do {
 			saveindex = ktr_idx;
 			newindex = (saveindex + 1) % ktr_entries;
 		} while (atomic_cmpset_rel_int(&ktr_idx, saveindex, newindex) == 0);
 		entry = &ktr_buf[saveindex];
 	}
 	entry->ktr_timestamp = KTR_TIME;
 	entry->ktr_cpu = cpu;
 	entry->ktr_thread = curthread;
 	if (file != NULL)
 		while (strncmp(file, "../", 3) == 0)
 			file += 3;
 	entry->ktr_file = file;
 	entry->ktr_line = line;
 #ifdef KTR_VERBOSE
 	if (ktr_verbose) {
 #ifdef SMP
 		printf("cpu%d ", cpu);
 #endif
 		if (ktr_verbose > 1) {
 			printf("%s.%d\t", entry->ktr_file,
 			    entry->ktr_line);
 		}
 		printf(format, arg1, arg2, arg3, arg4, arg5, arg6);
 		printf("\n");
 	}
 #endif
 	entry->ktr_desc = format;
 	entry->ktr_parms[0] = arg1;
 	entry->ktr_parms[1] = arg2;
 	entry->ktr_parms[2] = arg3;
 	entry->ktr_parms[3] = arg4;
 	entry->ktr_parms[4] = arg5;
 	entry->ktr_parms[5] = arg6;
 #ifdef KTR_ALQ
 	if (ktr_alq_enabled && ale)
 		alq_post(ktr_alq, ale);
 done:
 #endif
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	td->td_pflags &= ~TDP_INKTR;
 #endif
 }
 
 #ifdef DDB
 
 struct tstate {
 	int	cur;
 	int	first;
 };
 static	struct tstate tstate;
 static	int db_ktr_verbose;
 static	int db_mach_vtrace(void);
 
 DB_SHOW_COMMAND(ktr, db_ktr_all)
 {
 	
 	tstate.cur = (ktr_idx - 1) % ktr_entries;
 	tstate.first = -1;
 	db_ktr_verbose = 0;
 	db_ktr_verbose |= (strchr(modif, 'v') != NULL) ? 2 : 0;
 	db_ktr_verbose |= (strchr(modif, 'V') != NULL) ? 1 : 0; /* just timestamp please */
 	if (strchr(modif, 'a') != NULL) {
 		db_disable_pager();
 		while (cncheckc() == -1)
 			if (db_mach_vtrace() == 0)
 				break;
 	} else {
 		while (!db_pager_quit)
 			if (db_mach_vtrace() == 0)
 				break;
 	}
 }
 
 static int
 db_mach_vtrace(void)
 {
 	struct ktr_entry	*kp;
 
 	if (tstate.cur == tstate.first || ktr_buf == NULL) {
 		db_printf("--- End of trace buffer ---\n");
 		return (0);
 	}
 	kp = &ktr_buf[tstate.cur];
 
 	/* Skip over unused entries. */
 	if (kp->ktr_desc == NULL) {
 		db_printf("--- End of trace buffer ---\n");
 		return (0);
 	}
 	db_printf("%d (%p", tstate.cur, kp->ktr_thread);
 #ifdef SMP
 	db_printf(":cpu%d", kp->ktr_cpu);
 #endif
 	db_printf(")");
 	if (db_ktr_verbose >= 1) {
 		db_printf(" %10.10lld", (long long)kp->ktr_timestamp);
 	}
 	if (db_ktr_verbose >= 2) {
 		db_printf(" %s.%d", kp->ktr_file, kp->ktr_line);
 	}
 	db_printf(": ");
 	db_printf(kp->ktr_desc, kp->ktr_parms[0], kp->ktr_parms[1],
 	    kp->ktr_parms[2], kp->ktr_parms[3], kp->ktr_parms[4],
 	    kp->ktr_parms[5]);
 	db_printf("\n");
 
 	if (tstate.first == -1)
 		tstate.first = tstate.cur;
 
 	if (--tstate.cur < 0)
 		tstate.cur = ktr_entries - 1;
 
 	return (1);
 }
 
 #endif	/* DDB */
Index: head/sys/kern/kern_rwlock.c
===================================================================
--- head/sys/kern/kern_rwlock.c	(revision 344854)
+++ head/sys/kern/kern_rwlock.c	(revision 344855)
@@ -1,1557 +1,1556 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
 
 #include <machine/cpu.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
 #define	ADAPTIVE_RWLOCKS
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Return the rwlock address when the lock cookie address is provided.
  * This functionality assumes that struct rwlock* have a member named rw_lock.
  */
 #define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void	db_show_rwlock(const struct lock_object *lock);
 #endif
 static void	assert_rw(const struct lock_object *lock, int what);
 static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_rw,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
 	.lc_lock = lock_rw,
 	.lc_unlock = unlock_rw,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rw,
 #endif
 };
 
 #ifdef ADAPTIVE_RWLOCKS
 static int __read_frequently rowner_retries;
 static int __read_frequently rowner_loops;
 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
     "rwlock debugging");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
 
 static struct lock_delay_config __read_frequently rw_delay;
 
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
     0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
     0, "");
 
 static void
 rw_lock_delay_init(void *arg __unused)
 {
 
 	lock_delay_default_init(&rw_delay);
 	rowner_retries = 10;
 	rowner_loops = max(10000, rw_delay.max);
 }
 LOCK_DELAY_SYSINIT(rw_lock_delay_init);
 #endif
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
  */
 
 #define	lv_rw_wowner(v)							\
 	((v) & RW_LOCK_READ ? NULL :					\
 	 (struct thread *)RW_OWNER((v)))
 
 #define	rw_wowner(rw)	lv_rw_wowner(RW_READ_VALUE(rw))
 
 /*
  * Returns if a write owner is recursed.  Write ownership is not assured
  * here and should be previously checked.
  */
 #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
 
 /*
  * Return true if curthread helds the lock.
  */
 #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
 
 /*
  * Return a pointer to the owning thread for this lock who should receive
  * any priority lent by threads that block on this lock.  Currently this
  * is identical to rw_wowner().
  */
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
 #define	__rw_assert(c, what, file, line)
 #endif
 
 void
 assert_rw(const struct lock_object *lock, int what)
 {
 
 	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
 lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
 		rw_rlock(rw);
 	else
 		rw_wlock(rw);
 }
 
 uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
 		return (1);
 	} else {
 		rw_wunlock(rw);
 		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_rw(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
 	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
 	struct rwlock *rw;
 	int flags;
 
 	rw = rwlock2rw(c);
 
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
 	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
 
 	flags = LO_UPGRADABLE;
 	if (opts & RW_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & RW_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & RW_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & RW_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
 	if (opts & RW_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
 }
 
 void
 _rw_destroy(volatile uintptr_t *c)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
 	lock_destroy(&rw->lock_object);
 }
 
 void
 rw_sysinit(void *arg)
 {
 	struct rw_args *args;
 
 	args = arg;
 	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
 	    args->ra_flags);
 }
 
 int
 _rw_wowned(const volatile uintptr_t *c)
 {
 
 	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t tid, v;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	tid = (uintptr_t)curthread;
 	v = RW_UNLOCKED;
 	if (!_rw_write_lock_fetch(rw, &v, tid))
 		_rw_wlock_hard(rw, v, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
 		    0, 0, file, line, LOCKSTAT_WRITER);
 
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 int
 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, v;
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
 	if (SCHEDULER_STOPPED_TD(td))
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
 	rval = 1;
 	recursed = false;
 	v = RW_UNLOCKED;
 	for (;;) {
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
 			break;
 		if (v == RW_UNLOCKED)
 			continue;
 		if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
 			rw->rw_recurse++;
 			atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 	return (rval);
 }
 
 int
 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
 }
 
 void
 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
 
 #ifdef LOCK_PROFILING
 	_rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
 #else
 	__rw_wunlock(rw, curthread, file, line);
 #endif
 
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
  * reader already owns a read lock and the lock is locked for read to
  * prevent deadlock from reader recursion.  Also succeeds if the lock
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
 static bool __always_inline
 __rw_can_read(struct thread *td, uintptr_t v, bool fp)
 {
 
 	if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
 	    == RW_LOCK_READ)
 		return (true);
 	if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
 		return (true);
 	return (false);
 }
 
 static bool __always_inline
 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
     LOCK_FILE_LINE_ARG_DEF)
 {
 
 	/*
 	 * Handle the easy case.  If no other thread has a write
 	 * lock, then try to bump up the count of read locks.  Note
 	 * that we have to preserve the current state of the
 	 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
 	 * read lock, then rw_lock must have changed, so restart
 	 * the loop.  Note that this handles the case of a
 	 * completely unlocked rwlock since such a lock is encoded
 	 * as a read lock with no waiters.
 	 */
 	while (__rw_can_read(td, *vp, fp)) {
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
 			*vp + RW_ONE_READER)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR4(KTR_LOCK,
 				    "%s: %p succeed %p -> %p", __func__,
 				    rw, (void *)*vp,
 				    (void *)(*vp + RW_ONE_READER));
 			td->td_rw_rlocks++;
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static void __noinline
 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
     LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	struct thread *owner;
 #ifdef ADAPTIVE_RWLOCKS
 	int spintries = 0;
 	int i, n;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
 		if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
 			goto out_lockstat;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 	state = v;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 #if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&rw->lock_object,
 	    &contested, &waittime);
 
 	for (;;) {
 		if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				do {
 					lock_delay(&lda);
 					v = RW_READ_VALUE(rw);
 					owner = lv_rw_wowner(v);
 				} while (owner != NULL && TD_IS_RUNNING(owner));
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else {
 			if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) {
 				MPASS(!__rw_can_read(td, v, false));
 				lock_delay_spin(2);
 				v = RW_READ_VALUE(rw);
 				continue;
 			}
 			if (spintries < rowner_retries) {
 				spintries++;
 				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "spinning", "lockname:\"%s\"",
 				    rw->lock_object.lo_name);
 				n = RW_READERS(v);
 				for (i = 0; i < rowner_loops; i += n) {
 					lock_delay_spin(n);
 					v = RW_READ_VALUE(rw);
 					if (!(v & RW_LOCK_READ))
 						break;
 					n = RW_READERS(v);
 					if (n == 0)
 						break;
 					if (__rw_can_read(td, v, false))
 						break;
 				}
 #ifdef KDTRACE_HOOKS
 				lda.spin_cnt += rowner_loops - i;
 #endif
 				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 				    "running");
 				if (i < rowner_loops)
 					continue;
 			}
 		}
 #endif
 
 		/*
 		 * Okay, now it's the hard case.  Some other thread already
 		 * has a write lock or there are write waiters present,
 		 * acquire the turnstile lock so we can begin the process
 		 * of blocking.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 
 		/*
 		 * The lock might have been released while we spun, so
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = RW_READ_VALUE(rw);
 retry_ts:
 		if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) ||
 		    __rw_can_read(td, v, false)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 		owner = lv_rw_wowner(v);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (owner != NULL) {
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
 		MPASS(!__rw_can_read(td, v, false));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
 		 * we can go ahead and block.  If it is not set then try
 		 * to set it.  If we fail to set it drop the turnstile
 		 * lock and restart the loop.
 		 */
 		if (!(v & RW_LOCK_READ_WAITERS)) {
 			if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 			    v | RW_LOCK_READ_WAITERS))
 				goto retry_ts;
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
 				    __func__, rw);
 		}
 
 		/*
 		 * We were unable to acquire the lock and the read waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		MPASS(owner == rw_owner(rw));
 		turnstile_wait(ts, owner, TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 		v = RW_READ_VALUE(rw);
 	}
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 out_lockstat:
 #endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_READER);
 }
 
 void
 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t v;
 
 	td = curthread;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
 	    !TD_IS_IDLETHREAD(td),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    td, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != td,
 	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	v = RW_READ_VALUE(rw);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
 	    !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
 		__rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_obtain_lock_success(&rw->lock_object, 0, 0,
 		    file, line);
 
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	__rw_rlock_int(rw LOCK_FILE_LINE_ARG);
 }
 
 int
 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 
 	x = rw->rw_lock;
 	for (;;) {
 		KASSERT(rw->rw_lock != RW_DESTROYED,
 		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
 		if (!(x & RW_LOCK_READ))
 			break;
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_rw_rlocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
 }
 
 static bool __always_inline
 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
 {
 
 	for (;;) {
 		if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
 			    *vp - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, rw, (void *)*vp,
 					    (void *)(*vp - RW_ONE_READER));
 				td->td_rw_rlocks--;
 				return (true);
 			}
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
     LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	uintptr_t setv, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	if (__rw_runlock_try(rw, td, &v))
 		goto out_lockstat;
 
 	/*
 	 * Ok, we know we have waiters and we think we are the
 	 * last reader, so grab the turnstile lock.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = RW_READ_VALUE(rw);
 	for (;;) {
 		if (__rw_runlock_try(rw, td, &v))
 			break;
 
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
 		 * Try to drop our lock leaving the lock in a unlocked
 		 * state.
 		 *
 		 * If you wanted to do explicit lock handoff you'd have to
 		 * do it here.  You'd also want to use turnstile_signal()
 		 * and you'd have to handle the race where a higher
 		 * priority thread blocks on the write lock before the
 		 * thread you wakeup actually runs and have the new thread
 		 * "steal" the lock.  For now it's a lot simpler to just
 		 * wakeup all of the waiters.
 		 *
 		 * As above, if we fail, then another thread might have
 		 * acquired a read lock, so drop the turnstile lock and
 		 * restart.
 		 */
 		setv = RW_UNLOCKED;
 		queue = TS_SHARED_QUEUE;
 		if (v & RW_LOCK_WRITE_WAITERS) {
 			queue = TS_EXCLUSIVE_QUEUE;
 			setv |= (v & RW_LOCK_READ_WAITERS);
 		}
 		setv |= (v & RW_LOCK_WRITE_SPINNER);
 		if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
 			continue;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
 			    __func__, rw);
 
 		/*
 		 * Ok.  The lock is released and all that's left is to
 		 * wake up the waiters.  Note that the lock might not be
 		 * free anymore, but in that case the writers will just
 		 * block again if they run before the new lock holder(s)
 		 * release the lock.
 		 */
 		ts = turnstile_lookup(&rw->lock_object);
 		MPASS(ts != NULL);
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts);
 		td->td_rw_rlocks--;
 		break;
 	}
 	turnstile_chain_unlock(&rw->lock_object);
 out_lockstat:
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
 }
 
 void
 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t v;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
 	td = curthread;
 	v = RW_READ_VALUE(rw);
 
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
 	    !__rw_runlock_try(rw, td, &v)))
 		__rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_release_lock(&rw->lock_object);
 
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	_rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
 }
 
 #ifdef ADAPTIVE_RWLOCKS
 static inline void
 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work)
 {
 
 	if (v & RW_LOCK_WRITE_SPINNER)
 		return;
 	if (*in_critical) {
 		critical_exit();
 		*in_critical = false;
 		(*extra_work)--;
 	}
 }
 #else
 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0)
 #endif
 
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
  * read or write lock.
  */
 void
 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t tid;
 	struct rwlock *rw;
 	struct turnstile *ts;
 	struct thread *owner;
 #ifdef ADAPTIVE_RWLOCKS
 	int spintries = 0;
 	int i, n;
 	enum { READERS, WRITER } sleep_reason = READERS;
 	bool in_critical = false;
 #endif
 	uintptr_t setv;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 	int extra_work = 0;
 
 	tid = (uintptr_t)curthread;
 	rw = rwlock2rw(c);
 
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
 		while (v == RW_UNLOCKED) {
 			if (_rw_write_lock_fetch(rw, &v, tid))
 				goto out_lockstat;
 		}
 		extra_work = 1;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	doing_lockprof = 1;
 	state = v;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 #if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	if (__predict_false(v == RW_UNLOCKED))
 		v = RW_READ_VALUE(rw);
 
 	if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
 		    __func__, rw->lock_object.lo_name, file, line));
 		rw->rw_recurse++;
 		atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&rw->lock_object,
 	    &contested, &waittime);
 
 	for (;;) {
 		if (v == RW_UNLOCKED) {
 			if (_rw_write_lock_fetch(rw, &v, tid))
 				break;
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
 		if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) {
 			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
 				break;
 			continue;
 		}
 
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		if (!(v & RW_LOCK_READ)) {
 			rw_drop_critical(v, &in_critical, &extra_work);
 			sleep_reason = WRITER;
 			owner = lv_rw_wowner(v);
 			if (!TD_IS_RUNNING(owner))
 				goto ts;
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				v = RW_READ_VALUE(rw);
 				owner = lv_rw_wowner(v);
 			} while (owner != NULL && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		} else if (RW_READERS(v) > 0) {
 			sleep_reason = READERS;
 			if (spintries == rowner_retries)
 				goto ts;
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!in_critical) {
 					critical_enter();
 					in_critical = true;
 					extra_work++;
 				}
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
 					critical_exit();
 					in_critical = false;
 					extra_work--;
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			n = RW_READERS(v);
 			for (i = 0; i < rowner_loops; i += n) {
 				lock_delay_spin(n);
 				v = RW_READ_VALUE(rw);
 				if (!(v & RW_LOCK_WRITE_SPINNER))
 					break;
 				if (!(v & RW_LOCK_READ))
 					break;
 				n = RW_READERS(v);
 				if (n == 0)
 					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i < rowner_loops)
 				continue;
 		}
 ts:
 #endif
 		ts = turnstile_trywait(&rw->lock_object);
 		v = RW_READ_VALUE(rw);
 retry_ts:
 		owner = lv_rw_wowner(v);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (owner != NULL) {
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				rw_drop_critical(v, &in_critical, &extra_work);
 				continue;
 			}
 		} else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
 			turnstile_cancel(ts);
 			rw_drop_critical(v, &in_critical, &extra_work);
 			continue;
 		}
 #endif
 		/*
 		 * Check for the waiters flags about this rwlock.
 		 * If the lock was released, without maintain any pending
 		 * waiters queue, simply try to acquire it.
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
 		setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		if ((v & ~setv) == RW_UNLOCKED) {
 			setv &= ~RW_LOCK_WRITE_SPINNER;
 			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) {
 				if (setv)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
 				break;
 			}
 			goto retry_ts;
 		}
 
 #ifdef ADAPTIVE_RWLOCKS
 		if (in_critical) {
 			if ((v & RW_LOCK_WRITE_SPINNER) ||
 			    !((v & RW_LOCK_WRITE_WAITERS))) {
 				setv = v & ~RW_LOCK_WRITE_SPINNER;
 				setv |= RW_LOCK_WRITE_WAITERS;
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv))
 					goto retry_ts;
 			}
 			critical_exit();
 			in_critical = false;
 			extra_work--;
 		} else {
 #endif
 			/*
 			 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 			 * set it.  If we fail to set it, then loop back and try
 			 * again.
 			 */
 			if (!(v & RW_LOCK_WRITE_WAITERS)) {
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 				    v | RW_LOCK_WRITE_WAITERS))
 					goto retry_ts;
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 					    __func__, rw);
 			}
 #ifdef ADAPTIVE_RWLOCKS
 		}
 #endif
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		MPASS(owner == rw_owner(rw));
 		turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 #ifdef ADAPTIVE_RWLOCKS
 		spintries = 0;
 #endif
 		v = RW_READ_VALUE(rw);
 	}
 	if (__predict_true(!extra_work))
 		return;
 #ifdef ADAPTIVE_RWLOCKS
 	if (in_critical)
 		critical_exit();
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 out_lockstat:
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_WRITER);
 }
 
 /*
  * This function is called if lockstat is active or the first try at releasing
  * a write lock failed.  The latter means that the lock is recursed or one of
  * the 2 waiter bits must be set indicating that at least one thread is waiting
  * on this lock.
  */
 void
 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t tid, setv;
 	int queue;
 
 	tid = (uintptr_t)curthread;
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 	if (__predict_false(v == tid))
 		v = RW_READ_VALUE(rw);
 
 	if (v & RW_LOCK_WRITER_RECURSED) {
 		if (--(rw->rw_recurse) == 0)
 			atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
 	if (v == tid && _rw_write_unlock(rw, tid))
 		return;
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 
 	turnstile_chain_lock(&rw->lock_object);
 
 	/*
 	 * Use the same algo as sx locks for now.  Prefer waking up shared
 	 * waiters if we have any over writers.  This is probably not ideal.
 	 *
 	 * 'v' is the value we are going to write back to rw_lock.  If we
 	 * have waiters on both queues, we need to preserve the state of
 	 * the waiter flag for the queue we don't wake up.  For now this is
 	 * hardcoded for the algorithm mentioned above.
 	 *
 	 * In the case of both readers and writers waiting we wakeup the
 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 	 * new writer comes in before a reader it will claim the lock up
 	 * above.  There is probably a potential priority inversion in
 	 * there that could be worked around either by waking both queues
 	 * of waiters or doing some complicated lock handoff gymnastics.
 	 */
 	setv = RW_UNLOCKED;
 	v = RW_READ_VALUE(rw);
 	queue = TS_SHARED_QUEUE;
 	if (v & RW_LOCK_WRITE_WAITERS) {
 		queue = TS_EXCLUSIVE_QUEUE;
 		setv |= (v & RW_LOCK_READ_WAITERS);
 	}
 	atomic_store_rel_ptr(&rw->rw_lock, setv);
 
 	/* Wake up all waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 		    queue == TS_SHARED_QUEUE ? "read" : "write");
 
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	turnstile_broadcast(ts, queue);
 	turnstile_unpend(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 }
 
 /*
  * Attempt to do a non-blocking upgrade from a read lock to a write
  * lock.  This will only succeed if this thread holds a single read
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t v, setv, tid;
 	struct turnstile *ts;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
 	 * are any write waiters, then we will have to lock the
 	 * turnstile first to prevent races with another writer
 	 * calling turnstile_wait() before we have claimed this
 	 * turnstile.  So, do the simple case of no waiters first.
 	 */
 	tid = (uintptr_t)curthread;
 	success = 0;
 	v = RW_READ_VALUE(rw);
 	for (;;) {
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
 			success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
 			if (!success)
 				continue;
 			break;
 		}
 
 		/*
 		 * Ok, we think we have waiters, so lock the turnstile.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 		v = RW_READ_VALUE(rw);
 retry_ts:
 		if (RW_READERS(v) > 1) {
 			turnstile_cancel(ts);
 			break;
 		}
 		/*
 		 * Try to switch from one reader to a writer again.  This time
 		 * we honor the current state of the waiters flags.
 		 * If we obtain the lock with the flags set, then claim
 		 * ownership of the turnstile.
 		 */
 		setv = tid | (v & RW_LOCK_WAITERS);
 		success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
 		if (success) {
 			if (v & RW_LOCK_WAITERS)
 				turnstile_claim(ts);
 			else
 				turnstile_cancel(ts);
 			break;
 		}
 		goto retry_ts;
 	}
 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_rw_rlocks--;
 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(rw__upgrade, rw);
 	}
 	return (success);
 }
 
 int
 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
 }
 
 /*
  * Downgrade a write lock into a single read lock.
  */
 void
 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 
 	/*
 	 * Convert from a writer to a single reader.  First we handle
 	 * the easy case with no waiters.  If there are any waiters, we
 	 * lock the turnstile and "disown" the lock.
 	 */
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 		goto out;
 
 	/*
 	 * Ok, we think we have waiters, so lock the turnstile so we can
 	 * read the waiter flags without any races.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = rw->rw_lock & RW_LOCK_WAITERS;
 	rwait = v & RW_LOCK_READ_WAITERS;
 	wwait = v & RW_LOCK_WRITE_WAITERS;
 	MPASS(rwait | wwait);
 
 	/*
 	 * Downgrade from a write lock while preserving waiters flag
 	 * and give up ownership of the turnstile.
 	 */
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	if (!wwait)
 		v &= ~RW_LOCK_READ_WAITERS;
 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 	/*
 	 * Wake other readers if there are no writers pending.  Otherwise they
 	 * won't be able to acquire the lock anyway.
 	 */
 	if (rwait && !wwait) {
 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
 		turnstile_unpend(ts);
 	} else
 		turnstile_disown(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 out:
 	curthread->td_rw_rlocks++;
 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(rw__downgrade, rw);
 }
 
 void
 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	__rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef __rw_assert
 #endif
 
 /*
  * In the non-WITNESS case, rw_assert() can only detect that at least
  * *some* thread owns an rlock, but it cannot guarantee that *this*
  * thread owns an rlock.
  */
 void
 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has a write lock or we have one
 		 * and are asserting a read lock, fail.  Also, if no one
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
 		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
 		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    rw->lock_object.lo_name, file,
 					    line);
 			} else if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case RA_WLOCKED:
 	case RA_WLOCKED | RA_RECURSED:
 	case RA_WLOCKED | RA_NOTRECURSED:
 		if (rw_wowner(rw) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		if (rw_recursed(rw)) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold a write lock fail.  We can't reliably check
 		 * to see if we hold a read lock or not.
 		 */
 		if (rw_wowner(rw) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 void
 db_show_rwlock(const struct lock_object *lock)
 {
 	const struct rwlock *rw;
 	struct thread *td;
 
 	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (rw->rw_lock == RW_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (rw->rw_lock & RW_LOCK_READ)
 		db_printf("RLOCK: %ju locks\n",
 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
 	else {
 		td = rw_wowner(rw);
 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (rw_recursed(rw))
 			db_printf(" recursed: %u\n", rw->rw_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 	case RW_LOCK_READ_WAITERS:
 		db_printf("readers\n");
 		break;
 	case RW_LOCK_WRITE_WAITERS:
 		db_printf("writers\n");
 		break;
 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 		db_printf("readers and writers\n");
 		break;
 	default:
 		db_printf("none\n");
 		break;
 	}
 }
 
 #endif
Index: head/sys/kern/subr_lock.c
===================================================================
--- head/sys/kern/subr_lock.c	(revision 344854)
+++ head/sys/kern/subr_lock.c	(revision 344855)
@@ -1,704 +1,703 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This module holds the global variables and functions used to maintain
  * lock_object structures.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_mprof.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/lock_profile.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <machine/cpufunc.h>
 
 SDT_PROVIDER_DEFINE(lock);
 SDT_PROBE_DEFINE1(lock, , , starvation, "u_int");
 
 CTASSERT(LOCK_CLASS_MAX == 15);
 
 struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
 	&lock_class_mtx_spin,
 	&lock_class_mtx_sleep,
 	&lock_class_sx,
 	&lock_class_rm,
 	&lock_class_rm_sleepable,
 	&lock_class_rw,
 	&lock_class_lockmgr,
 };
 
 void
 lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
     const char *type, int flags)
 {
 	int i;
 
 	/* Check for double-init and zero object. */
 	KASSERT(flags & LO_NEW || !lock_initialized(lock),
 	    ("lock \"%s\" %p already initialized", name, lock));
 
 	/* Look up lock class to find its index. */
 	for (i = 0; i < LOCK_CLASS_MAX; i++)
 		if (lock_classes[i] == class) {
 			lock->lo_flags = i << LO_CLASSSHIFT;
 			break;
 		}
 	KASSERT(i < LOCK_CLASS_MAX, ("unknown lock class %p", class));
 
 	/* Initialize the lock object. */
 	lock->lo_name = name;
 	lock->lo_flags |= flags | LO_INITIALIZED;
 	LOCK_LOG_INIT(lock, 0);
 	WITNESS_INIT(lock, (type != NULL) ? type : name);
 }
 
 void
 lock_destroy(struct lock_object *lock)
 {
 
 	KASSERT(lock_initialized(lock), ("lock %p is not initialized", lock));
 	WITNESS_DESTROY(lock);
 	LOCK_LOG_DESTROY(lock, 0);
 	lock->lo_flags &= ~LO_INITIALIZED;
 }
 
 static SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
 static SYSCTL_NODE(_debug_lock, OID_AUTO, delay, CTLFLAG_RD, NULL,
     "lock delay");
 
 static u_int __read_mostly starvation_limit = 131072;
 SYSCTL_INT(_debug_lock_delay, OID_AUTO, starvation_limit, CTLFLAG_RW,
     &starvation_limit, 0, "");
 
 static u_int __read_mostly restrict_starvation = 0;
 SYSCTL_INT(_debug_lock_delay, OID_AUTO, restrict_starvation, CTLFLAG_RW,
     &restrict_starvation, 0, "");
 
 void
 lock_delay(struct lock_delay_arg *la)
 {
 	struct lock_delay_config *lc = la->config;
 	u_int i;
 
 	la->delay <<= 1;
 	if (__predict_false(la->delay > lc->max))
 		la->delay = lc->max;
 
 	for (i = la->delay; i > 0; i--)
 		cpu_spinwait();
 
 	la->spin_cnt += la->delay;
 	if (__predict_false(la->spin_cnt > starvation_limit)) {
 		SDT_PROBE1(lock, , , starvation, la->delay);
 		if (restrict_starvation)
 			la->delay = lc->base;
 	}
 }
 
 static u_int
 lock_roundup_2(u_int val)
 {
 	u_int res;
 
 	for (res = 1; res <= val; res <<= 1)
 		continue;
 
 	return (res);
 }
 
 void
 lock_delay_default_init(struct lock_delay_config *lc)
 {
 
 	lc->base = 1;
 	lc->max = lock_roundup_2(mp_ncpus) * 256;
 	if (lc->max > 32678)
 		lc->max = 32678;
 }
 
 #ifdef DDB
 DB_SHOW_COMMAND(lock, db_show_lock)
 {
 	struct lock_object *lock;
 	struct lock_class *class;
 
 	if (!have_addr)
 		return;
 	lock = (struct lock_object *)addr;
 	if (LO_CLASSINDEX(lock) > LOCK_CLASS_MAX) {
 		db_printf("Unknown lock class: %d\n", LO_CLASSINDEX(lock));
 		return;
 	}
 	class = LOCK_CLASS(lock);
 	db_printf(" class: %s\n", class->lc_name);
 	db_printf(" name: %s\n", lock->lo_name);
 	class->lc_ddb_show(lock);
 }
 #endif
 
 #ifdef LOCK_PROFILING
 
 /*
  * One object per-thread for each lock the thread owns.  Tracks individual
  * lock instances.
  */
 struct lock_profile_object {
 	LIST_ENTRY(lock_profile_object) lpo_link;
 	struct lock_object *lpo_obj;
 	const char	*lpo_file;
 	int		lpo_line;
 	uint16_t	lpo_ref;
 	uint16_t	lpo_cnt;
 	uint64_t	lpo_acqtime;
 	uint64_t	lpo_waittime;
 	u_int		lpo_contest_locking;
 };
 
 /*
  * One lock_prof for each (file, line, lock object) triple.
  */
 struct lock_prof {
 	SLIST_ENTRY(lock_prof) link;
 	struct lock_class *class;
 	const char	*file;
 	const char	*name;
 	int		line;
 	int		ticks;
 	uintmax_t	cnt_wait_max;
 	uintmax_t	cnt_max;
 	uintmax_t	cnt_tot;
 	uintmax_t	cnt_wait;
 	uintmax_t	cnt_cur;
 	uintmax_t	cnt_contest_locking;
 };
 
 SLIST_HEAD(lphead, lock_prof);
 
 #define	LPROF_HASH_SIZE		4096
 #define	LPROF_HASH_MASK		(LPROF_HASH_SIZE - 1)
 #define	LPROF_CACHE_SIZE	4096
 
 /*
  * Array of objects and profs for each type of object for each cpu.  Spinlocks
  * are handled separately because a thread may be preempted and acquire a
  * spinlock while in the lock profiling code of a non-spinlock.  In this way
  * we only need a critical section to protect the per-cpu lists.
  */
 struct lock_prof_type {
 	struct lphead		lpt_lpalloc;
 	struct lpohead		lpt_lpoalloc;
 	struct lphead		lpt_hash[LPROF_HASH_SIZE];
 	struct lock_prof	lpt_prof[LPROF_CACHE_SIZE];
 	struct lock_profile_object lpt_objs[LPROF_CACHE_SIZE];
 };
 
 struct lock_prof_cpu {
 	struct lock_prof_type	lpc_types[2]; /* One for spin one for other. */
 };
 
 struct lock_prof_cpu *lp_cpu[MAXCPU];
 
 volatile int __read_mostly lock_prof_enable;
 static volatile int lock_prof_resetting;
 
 #define LPROF_SBUF_SIZE		256
 
 static int lock_prof_rejected;
 static int lock_prof_skipspin;
 static int lock_prof_skipcount;
 
 #ifndef USE_CPU_NANOSECONDS
 uint64_t
 nanoseconds(void)
 {
 	struct bintime bt;
 	uint64_t ns;
 
 	binuptime(&bt);
 	/* From bintime2timespec */
 	ns = bt.sec * (uint64_t)1000000000;
 	ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32;
 	return (ns);
 }
 #endif
 
 static void
 lock_prof_init_type(struct lock_prof_type *type)
 {
 	int i;
 
 	SLIST_INIT(&type->lpt_lpalloc);
 	LIST_INIT(&type->lpt_lpoalloc);
 	for (i = 0; i < LPROF_CACHE_SIZE; i++) {
 		SLIST_INSERT_HEAD(&type->lpt_lpalloc, &type->lpt_prof[i],
 		    link);
 		LIST_INSERT_HEAD(&type->lpt_lpoalloc, &type->lpt_objs[i],
 		    lpo_link);
 	}
 }
 
 static void
 lock_prof_init(void *arg)
 {
 	int cpu;
 
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF,
 		    M_WAITOK | M_ZERO);
 		lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]);
 		lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]);
 	}
 }
 SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
 
 static void
 lock_prof_reset_wait(void)
 {
 
 	/*
 	 * Spin relinquishing our cpu so that quiesce_all_cpus may
 	 * complete.
 	 */
 	while (lock_prof_resetting)
 		sched_relinquish(curthread);
 }
 
 static void
 lock_prof_reset(void)
 {
 	struct lock_prof_cpu *lpc;
 	int enabled, i, cpu;
 
 	/*
 	 * We not only race with acquiring and releasing locks but also
 	 * thread exit.  To be certain that threads exit without valid head
 	 * pointers they must see resetting set before enabled is cleared.
 	 * Otherwise a lock may not be removed from a per-thread list due
 	 * to disabled being set but not wait for reset() to remove it below.
 	 */
 	atomic_store_rel_int(&lock_prof_resetting, 1);
 	enabled = lock_prof_enable;
 	lock_prof_enable = 0;
 	quiesce_all_cpus("profreset", 0);
 	/*
 	 * Some objects may have migrated between CPUs.  Clear all links
 	 * before we zero the structures.  Some items may still be linked
 	 * into per-thread lists as well.
 	 */
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		lpc = lp_cpu[cpu];
 		for (i = 0; i < LPROF_CACHE_SIZE; i++) {
 			LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
 			LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
 		}
 	}
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		lpc = lp_cpu[cpu];
 		bzero(lpc, sizeof(*lpc));
 		lock_prof_init_type(&lpc->lpc_types[0]);
 		lock_prof_init_type(&lpc->lpc_types[1]);
 	}
 	atomic_store_rel_int(&lock_prof_resetting, 0);
 	lock_prof_enable = enabled;
 }
 
 static void
 lock_prof_output(struct lock_prof *lp, struct sbuf *sb)
 {
 	const char *p;
 
 	for (p = lp->file; p != NULL && strncmp(p, "../", 3) == 0; p += 3);
 	sbuf_printf(sb,
 	    "%8ju %9ju %11ju %11ju %11ju %6ju %6ju %2ju %6ju %s:%d (%s:%s)\n",
 	    lp->cnt_max / 1000, lp->cnt_wait_max / 1000, lp->cnt_tot / 1000,
 	    lp->cnt_wait / 1000, lp->cnt_cur,
 	    lp->cnt_cur == 0 ? (uintmax_t)0 :
 	    lp->cnt_tot / (lp->cnt_cur * 1000),
 	    lp->cnt_cur == 0 ? (uintmax_t)0 :
 	    lp->cnt_wait / (lp->cnt_cur * 1000),
 	    (uintmax_t)0, lp->cnt_contest_locking,
 	    p, lp->line, lp->class->lc_name, lp->name);
 }
 
 static void
 lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
     int spin, int t)
 {
 	struct lock_prof_type *type;
 	struct lock_prof *l;
 	int cpu;
 
 	dst->file = match->file;
 	dst->line = match->line;
 	dst->class = match->class;
 	dst->name = match->name;
 
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (lp_cpu[cpu] == NULL)
 			continue;
 		type = &lp_cpu[cpu]->lpc_types[spin];
 		SLIST_FOREACH(l, &type->lpt_hash[hash], link) {
 			if (l->ticks == t)
 				continue;
 			if (l->file != match->file || l->line != match->line ||
 			    l->name != match->name)
 				continue;
 			l->ticks = t;
 			if (l->cnt_max > dst->cnt_max)
 				dst->cnt_max = l->cnt_max;
 			if (l->cnt_wait_max > dst->cnt_wait_max)
 				dst->cnt_wait_max = l->cnt_wait_max;
 			dst->cnt_tot += l->cnt_tot;
 			dst->cnt_wait += l->cnt_wait;
 			dst->cnt_cur += l->cnt_cur;
 			dst->cnt_contest_locking += l->cnt_contest_locking;
 		}
 	}
 	
 }
 
 static void
 lock_prof_type_stats(struct lock_prof_type *type, struct sbuf *sb, int spin,
     int t)
 {
 	struct lock_prof *l;
 	int i;
 
 	for (i = 0; i < LPROF_HASH_SIZE; ++i) {
 		SLIST_FOREACH(l, &type->lpt_hash[i], link) {
 			struct lock_prof lp = {};
 
 			if (l->ticks == t)
 				continue;
 			lock_prof_sum(l, &lp, i, spin, t);
 			lock_prof_output(&lp, sb);
 		}
 	}
 }
 
 static int
 dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf *sb;
 	int error, cpu, t;
 	int enabled;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = sbuf_new_for_sysctl(NULL, NULL, LPROF_SBUF_SIZE, req);
 	sbuf_printf(sb, "\n%8s %9s %11s %11s %11s %6s %6s %2s %6s %s\n",
 	    "max", "wait_max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
 	enabled = lock_prof_enable;
 	lock_prof_enable = 0;
 	quiesce_all_cpus("profstat", 0);
 	t = ticks;
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (lp_cpu[cpu] == NULL)
 			continue;
 		lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t);
 		lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t);
 	}
 	lock_prof_enable = enabled;
 
 	error = sbuf_finish(sb);
 	/* Output a trailing NUL. */
 	if (error == 0)
 		error = SYSCTL_OUT(req, "", 1);
 	sbuf_delete(sb);
 	return (error);
 }
 
 static int
 enable_lock_prof(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = lock_prof_enable;
 	error = sysctl_handle_int(oidp, &v, v, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == lock_prof_enable)
 		return (0);
 	if (v == 1)
 		lock_prof_reset();
 	lock_prof_enable = !!v;
 
 	return (0);
 }
 
 static int
 reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = 0;
 	error = sysctl_handle_int(oidp, &v, 0, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == 0)
 		return (0);
 	lock_prof_reset();
 
 	return (0);
 }
 
 static struct lock_prof *
 lock_profile_lookup(struct lock_object *lo, int spin, const char *file,
     int line)
 {
 	const char *unknown = "(unknown)";
 	struct lock_prof_type *type;
 	struct lock_prof *lp;
 	struct lphead *head;
 	const char *p;
 	u_int hash;
 
 	p = file;
 	if (p == NULL || *p == '\0')
 		p = unknown;
 	hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line;
 	hash &= LPROF_HASH_MASK;
 	type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
 	head = &type->lpt_hash[hash];
 	SLIST_FOREACH(lp, head, link) {
 		if (lp->line == line && lp->file == p &&
 		    lp->name == lo->lo_name)
 			return (lp);
 
 	}
 	lp = SLIST_FIRST(&type->lpt_lpalloc);
 	if (lp == NULL) {
 		lock_prof_rejected++;
 		return (lp);
 	}
 	SLIST_REMOVE_HEAD(&type->lpt_lpalloc, link);
 	lp->file = p;
 	lp->line = line;
 	lp->class = LOCK_CLASS(lo);
 	lp->name = lo->lo_name;
 	SLIST_INSERT_HEAD(&type->lpt_hash[hash], lp, link);
 	return (lp);
 }
 
 static struct lock_profile_object *
 lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file,
     int line)
 {
 	struct lock_profile_object *l;
 	struct lock_prof_type *type;
 	struct lpohead *head;
 
 	head = &curthread->td_lprof[spin];
 	LIST_FOREACH(l, head, lpo_link)
 		if (l->lpo_obj == lo && l->lpo_file == file &&
 		    l->lpo_line == line)
 			return (l);
 	type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
 	l = LIST_FIRST(&type->lpt_lpoalloc);
 	if (l == NULL) {
 		lock_prof_rejected++;
 		return (NULL);
 	}
 	LIST_REMOVE(l, lpo_link);
 	l->lpo_obj = lo;
 	l->lpo_file = file;
 	l->lpo_line = line;
 	l->lpo_cnt = 0;
 	LIST_INSERT_HEAD(head, l, lpo_link);
 
 	return (l);
 }
 
 void
 lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
     uint64_t waittime, const char *file, int line)
 {
 	static int lock_prof_count;
 	struct lock_profile_object *l;
 	int spin;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	/* don't reset the timer when/if recursing */
 	if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
 		return;
 	if (lock_prof_skipcount &&
 	    (++lock_prof_count % lock_prof_skipcount) != 0)
 		return;
 	spin = (LOCK_CLASS(lo)->lc_flags & LC_SPINLOCK) ? 1 : 0;
 	if (spin && lock_prof_skipspin == 1)
 		return;
 	critical_enter();
 	/* Recheck enabled now that we're in a critical section. */
 	if (lock_prof_enable == 0)
 		goto out;
 	l = lock_profile_object_lookup(lo, spin, file, line);
 	if (l == NULL)
 		goto out;
 	l->lpo_cnt++;
 	if (++l->lpo_ref > 1)
 		goto out;
 	l->lpo_contest_locking = contested;
 	l->lpo_acqtime = nanoseconds(); 
 	if (waittime && (l->lpo_acqtime > waittime))
 		l->lpo_waittime = l->lpo_acqtime - waittime;
 	else
 		l->lpo_waittime = 0;
 out:
 	critical_exit();
 }
 
 void
 lock_profile_thread_exit(struct thread *td)
 {
 #ifdef INVARIANTS
 	struct lock_profile_object *l;
 
 	MPASS(curthread->td_critnest == 0);
 #endif
 	/*
 	 * If lock profiling was disabled we have to wait for reset to
 	 * clear our pointers before we can exit safely.
 	 */
 	lock_prof_reset_wait();
 #ifdef INVARIANTS
 	LIST_FOREACH(l, &td->td_lprof[0], lpo_link)
 		printf("thread still holds lock acquired at %s:%d\n",
 		    l->lpo_file, l->lpo_line);
 	LIST_FOREACH(l, &td->td_lprof[1], lpo_link)
 		printf("thread still holds lock acquired at %s:%d\n",
 		    l->lpo_file, l->lpo_line);
 #endif
 	MPASS(LIST_FIRST(&td->td_lprof[0]) == NULL);
 	MPASS(LIST_FIRST(&td->td_lprof[1]) == NULL);
 }
 
 void
 lock_profile_release_lock(struct lock_object *lo)
 {
 	struct lock_profile_object *l;
 	struct lock_prof_type *type;
 	struct lock_prof *lp;
 	uint64_t curtime, holdtime;
 	struct lpohead *head;
 	int spin;
 
 	if (SCHEDULER_STOPPED())
 		return;
 	if (lo->lo_flags & LO_NOPROFILE)
 		return;
 	spin = (LOCK_CLASS(lo)->lc_flags & LC_SPINLOCK) ? 1 : 0;
 	head = &curthread->td_lprof[spin];
 	if (LIST_FIRST(head) == NULL)
 		return;
 	critical_enter();
 	/* Recheck enabled now that we're in a critical section. */
 	if (lock_prof_enable == 0 && lock_prof_resetting == 1)
 		goto out;
 	/*
 	 * If lock profiling is not enabled we still want to remove the
 	 * lpo from our queue.
 	 */
 	LIST_FOREACH(l, head, lpo_link)
 		if (l->lpo_obj == lo)
 			break;
 	if (l == NULL)
 		goto out;
 	if (--l->lpo_ref > 0)
 		goto out;
 	lp = lock_profile_lookup(lo, spin, l->lpo_file, l->lpo_line);
 	if (lp == NULL)
 		goto release;
 	curtime = nanoseconds();
 	if (curtime < l->lpo_acqtime)
 		goto release;
 	holdtime = curtime - l->lpo_acqtime;
 
 	/*
 	 * Record if the lock has been held longer now than ever
 	 * before.
 	 */
 	if (holdtime > lp->cnt_max)
 		lp->cnt_max = holdtime;
 	if (l->lpo_waittime > lp->cnt_wait_max)
 		lp->cnt_wait_max = l->lpo_waittime;
 	lp->cnt_tot += holdtime;
 	lp->cnt_wait += l->lpo_waittime;
 	lp->cnt_contest_locking += l->lpo_contest_locking;
 	lp->cnt_cur += l->lpo_cnt;
 release:
 	LIST_REMOVE(l, lpo_link);
 	type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
 	LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link);
 out:
 	critical_exit();
 }
 
 static SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL,
     "lock profiling");
 SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW,
     &lock_prof_skipspin, 0, "Skip profiling on spinlocks.");
 SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipcount, CTLFLAG_RW,
     &lock_prof_skipcount, 0, "Sample approximately every N lock acquisitions.");
 SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
     &lock_prof_rejected, 0, "Number of rejected profiling records");
 SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
 SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
 SYSCTL_PROC(_debug_lock_prof, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, enable_lock_prof, "I", "Enable lock profiling");
 
 #endif
Index: head/sys/kern/subr_sleepqueue.c
===================================================================
--- head/sys/kern/subr_sleepqueue.c	(revision 344854)
+++ head/sys/kern/subr_sleepqueue.c	(revision 344855)
@@ -1,1454 +1,1453 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Implementation of sleep queues used to hold queue of threads blocked on
  * a wait channel.  Sleep queues are different from turnstiles in that wait
  * channels are not owned by anyone, so there is no priority propagation.
  * Sleep queues can also provide a timeout and can also be interrupted by
  * signals.  That said, there are several similarities between the turnstile
  * and sleep queue implementations.  (Note: turnstiles were implemented
  * first.)  For example, both use a hash table of the same size where each
  * bucket is referred to as a "chain" that contains both a spin lock and
  * a linked list of queues.  An individual queue is located by using a hash
  * to pick a chain, locking the chain, and then walking the chain searching
  * for the queue.  This means that a wait channel object does not need to
  * embed its queue head just as locks do not embed their turnstile queue
  * head.  Threads also carry around a sleep queue that they lend to the
  * wait channel when blocking.  Just as in turnstiles, the queue includes
  * a free list of the sleep queues of other threads blocked on the same
  * wait channel in the case of multiple waiters.
  *
  * Some additional functionality provided by sleep queues include the
  * ability to set a timeout.  The timeout is managed using a per-thread
  * callout that resumes a thread if it is asleep.  A thread may also
  * catch signals while it is asleep (aka an interruptible sleep).  The
  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
  * sleep queues also provide some extra assertions.  One is not allowed to
  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
  * must consistently use the same lock to synchronize with a wait channel,
  * though this check is currently only a warning for sleep/wakeup due to
  * pre-existing abuse of that API.  The same lock must also be held when
  * awakening threads, though that is currently only enforced for condition
  * variables.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sleepqueue_profiling.h"
 #include "opt_ddb.h"
 #include "opt_sched.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 
 #include <machine/atomic.h>
 
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 
 /*
  * Constants for the hash table of sleep queue chains.
  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
  */
 #ifndef SC_TABLESIZE
 #define	SC_TABLESIZE	256
 #endif
 CTASSERT(powerof2(SC_TABLESIZE));
 #define	SC_MASK		(SC_TABLESIZE - 1)
 #define	SC_SHIFT	8
 #define	SC_HASH(wc)	((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
 			    SC_MASK)
 #define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
 #define NR_SLEEPQS      2
 /*
  * There are two different lists of sleep queues.  Both lists are connected
  * via the sq_hash entries.  The first list is the sleep queue chain list
  * that a sleep queue is on when it is attached to a wait channel.  The
  * second list is the free list hung off of a sleep queue that is attached
  * to a wait channel.
  *
  * Each sleep queue also contains the wait channel it is attached to, the
  * list of threads blocked on that wait channel, flags specific to the
  * wait channel, and the lock used to synchronize with a wait channel.
  * The flags are used to catch mismatches between the various consumers
  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  * The lock pointer is only used when invariants are enabled for various
  * debugging checks.
  *
  * Locking key:
  *  c - sleep queue chain lock
  */
 struct sleepqueue {
 	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
 	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
 	void	*sq_wchan;			/* (c) Wait channel. */
 	int	sq_type;			/* (c) Queue type. */
 #ifdef INVARIANTS
 	struct lock_object *sq_lock;		/* (c) Associated lock. */
 #endif
 };
 
 struct sleepqueue_chain {
 	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
 	struct mtx sc_lock;			/* Spin lock for this chain. */
 #ifdef SLEEPQUEUE_PROFILING
 	u_int	sc_depth;			/* Length of sc_queues. */
 	u_int	sc_max_depth;			/* Max length of sc_queues. */
 #endif
 } __aligned(CACHE_LINE_SIZE);
 
 #ifdef SLEEPQUEUE_PROFILING
 u_int sleepq_max_depth;
 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
     "sleepq chain stats");
 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
     0, "maxmimum depth achieved of a single chain");
 
 static void	sleepq_profile(const char *wmesg);
 static int	prof_enabled;
 #endif
 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
 static uma_zone_t sleepq_zone;
 
 /*
  * Prototypes for non-exported routines.
  */
 static int	sleepq_catch_signals(void *wchan, int pri);
 static int	sleepq_check_signals(void);
 static int	sleepq_check_timeout(void);
 #ifdef INVARIANTS
 static void	sleepq_dtor(void *mem, int size, void *arg);
 #endif
 static int	sleepq_init(void *mem, int size, int flags);
 static int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
 		    int pri);
 static void	sleepq_switch(void *wchan, int pri);
 static void	sleepq_timeout(void *arg);
 
 SDT_PROBE_DECLARE(sched, , , sleep);
 SDT_PROBE_DECLARE(sched, , , wakeup);
 
 /*
  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  * Note that it must happen after sleepinit() has been fully executed, so
  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  */
 #ifdef SLEEPQUEUE_PROFILING
 static void
 init_sleepqueue_profiling(void)
 {
 	char chain_name[10];
 	struct sysctl_oid *chain_oid;
 	u_int i;
 
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		snprintf(chain_name, sizeof(chain_name), "%u", i);
 		chain_oid = SYSCTL_ADD_NODE(NULL,
 		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
 		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
 		    NULL);
 	}
 }
 
 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
     init_sleepqueue_profiling, NULL);
 #endif
 
 /*
  * Early initialization of sleep queues that is called from the sleepinit()
  * SYSINIT.
  */
 void
 init_sleepqueues(void)
 {
 	int i;
 
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_INIT(&sleepq_chains[i].sc_queues);
 		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
 		    MTX_SPIN | MTX_RECURSE);
 	}
 	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
 #ifdef INVARIANTS
 	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
 #else
 	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
 #endif
 
 	thread0.td_sleepqueue = sleepq_alloc();
 }
 
 /*
  * Get a sleep queue for a new thread.
  */
 struct sleepqueue *
 sleepq_alloc(void)
 {
 
 	return (uma_zalloc(sleepq_zone, M_WAITOK));
 }
 
 /*
  * Free a sleep queue when a thread is destroyed.
  */
 void
 sleepq_free(struct sleepqueue *sq)
 {
 
 	uma_zfree(sleepq_zone, sq);
 }
 
 /*
  * Lock the sleep queue chain associated with the specified wait channel.
  */
 void
 sleepq_lock(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_lock_spin(&sc->sc_lock);
 }
 
 /*
  * Look up the sleep queue associated with a given wait channel in the hash
  * table locking the associated sleep queue chain.  If no queue is found in
  * the table, NULL is returned.
  */
 struct sleepqueue *
 sleepq_lookup(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			return (sq);
 	return (NULL);
 }
 
 /*
  * Unlock the sleep queue chain associated with a given wait channel.
  */
 void
 sleepq_release(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_unlock_spin(&sc->sc_lock);
 }
 
 /*
  * Places the current thread on the sleep queue for the specified wait
  * channel.  If INVARIANTS is enabled, then it associates the passed in
  * lock with the sleepq to make sure it is held when that sleep queue is
  * woken up.
  */
 void
 sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
     int queue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(td->td_sleepqueue != NULL);
 	MPASS(wchan != NULL);
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 
 	/* If this thread is not allowed to sleep, die a horrible death. */
 	KASSERT(td->td_no_sleeping == 0,
 	    ("%s: td %p to sleep on wchan %p with sleeping prohibited",
 	    __func__, td, wchan));
 
 	/* Look up the sleep queue associated with the wait channel 'wchan'. */
 	sq = sleepq_lookup(wchan);
 
 	/*
 	 * If the wait channel does not already have a sleep queue, use
 	 * this thread's sleep queue.  Otherwise, insert the current thread
 	 * into the sleep queue already in use by this wait channel.
 	 */
 	if (sq == NULL) {
 #ifdef INVARIANTS
 		int i;
 
 		sq = td->td_sleepqueue;
 		for (i = 0; i < NR_SLEEPQS; i++) {
 			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
 			    ("thread's sleep queue %d is not empty", i));
 			KASSERT(sq->sq_blockedcnt[i] == 0,
 			    ("thread's sleep queue %d count mismatches", i));
 		}
 		KASSERT(LIST_EMPTY(&sq->sq_free),
 		    ("thread's sleep queue has a non-empty free list"));
 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 		sq->sq_lock = lock;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth++;
 		if (sc->sc_depth > sc->sc_max_depth) {
 			sc->sc_max_depth = sc->sc_depth;
 			if (sc->sc_max_depth > sleepq_max_depth)
 				sleepq_max_depth = sc->sc_max_depth;
 		}
 #endif
 		sq = td->td_sleepqueue;
 		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 		sq->sq_wchan = wchan;
 		sq->sq_type = flags & SLEEPQ_TYPE;
 	} else {
 		MPASS(wchan == sq->sq_wchan);
 		MPASS(lock == sq->sq_lock);
 		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
 		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
 	}
 	thread_lock(td);
 	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
 	sq->sq_blockedcnt[queue]++;
 	td->td_sleepqueue = NULL;
 	td->td_sqqueue = queue;
 	td->td_wchan = wchan;
 	td->td_wmesg = wmesg;
 	if (flags & SLEEPQ_INTERRUPTIBLE) {
 		td->td_flags |= TDF_SINTR;
 		td->td_flags &= ~TDF_SLEEPABORT;
 	}
 	thread_unlock(td);
 }
 
 /*
  * Sets a timeout that will remove the current thread from the specified
  * sleep queue after timo ticks if the thread has not already been awakened.
  */
 void
 sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
     int flags)
 {
 	struct sleepqueue_chain *sc __unused;
 	struct thread *td;
 	sbintime_t pr1;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_sleepqueue == NULL);
 	MPASS(wchan != NULL);
 	if (cold && td == &thread0)
 		panic("timed sleep before timers are working");
 	KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
 	    td->td_tid, td, (uintmax_t)td->td_sleeptimo));
 	thread_lock(td);
 	callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
 	thread_unlock(td);
 	callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
 	    sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
 	    C_DIRECT_EXEC);
 }
 
 /*
  * Return the number of actual sleepers for the specified queue.
  */
 u_int
 sleepq_sleepcnt(void *wchan, int queue)
 {
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	return (sq->sq_blockedcnt[queue]);
 }
 
 /*
  * Marks the pending sleep of the current thread as interruptible and
  * makes an initial check for pending signals before putting a thread
  * to sleep. Enters and exits with the thread lock held.  Thread lock
  * may have transitioned from the sleepq lock to a run lock.
  */
 static int
 sleepq_catch_signals(void *wchan, int pri)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 	struct proc *p;
 	struct sigacts *ps;
 	int sig, ret;
 
 	ret = 0;
 	td = curthread;
 	p = curproc;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(wchan != NULL);
 	if ((td->td_pflags & TDP_WAKEUP) != 0) {
 		td->td_pflags &= ~TDP_WAKEUP;
 		ret = EINTR;
 		thread_lock(td);
 		goto out;
 	}
 
 	/*
 	 * See if there are any pending signals or suspension requests for this
 	 * thread.  If not, we can switch immediately.
 	 */
 	thread_lock(td);
 	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) {
 		thread_unlock(td);
 		mtx_unlock_spin(&sc->sc_lock);
 		CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
 			(void *)td, (long)p->p_pid, td->td_name);
 		PROC_LOCK(p);
 		/*
 		 * Check for suspension first. Checking for signals and then
 		 * suspending could result in a missed signal, since a signal
 		 * can be delivered while this thread is suspended.
 		 */
 		if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
 			ret = thread_suspend_check(1);
 			MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
 			if (ret != 0) {
 				PROC_UNLOCK(p);
 				mtx_lock_spin(&sc->sc_lock);
 				thread_lock(td);
 				goto out;
 			}
 		}
 		if ((td->td_flags & TDF_NEEDSIGCHK) != 0) {
 			ps = p->p_sigacts;
 			mtx_lock(&ps->ps_mtx);
 			sig = cursig(td);
 			if (sig == -1) {
 				mtx_unlock(&ps->ps_mtx);
 				KASSERT((td->td_flags & TDF_SBDRY) != 0,
 				    ("lost TDF_SBDRY"));
 				KASSERT(TD_SBDRY_INTR(td),
 				    ("lost TDF_SERESTART of TDF_SEINTR"));
 				KASSERT((td->td_flags &
 				    (TDF_SEINTR | TDF_SERESTART)) !=
 				    (TDF_SEINTR | TDF_SERESTART),
 				    ("both TDF_SEINTR and TDF_SERESTART"));
 				ret = TD_SBDRY_ERRNO(td);
 			} else if (sig != 0) {
 				ret = SIGISMEMBER(ps->ps_sigintr, sig) ?
 				    EINTR : ERESTART;
 				mtx_unlock(&ps->ps_mtx);
 			} else {
 				mtx_unlock(&ps->ps_mtx);
 			}
 		}
 		/*
 		 * Lock the per-process spinlock prior to dropping the PROC_LOCK
 		 * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
 		 * thread_lock() are currently held in tdsendsignal().
 		 */
 		PROC_SLOCK(p);
 		mtx_lock_spin(&sc->sc_lock);
 		PROC_UNLOCK(p);
 		thread_lock(td);
 		PROC_SUNLOCK(p);
 	}
 	if (ret == 0) {
 		sleepq_switch(wchan, pri);
 		return (0);
 	}
 out:
 	/*
 	 * There were pending signals and this thread is still
 	 * on the sleep queue, remove it from the sleep queue.
 	 */
 	if (TD_ON_SLEEPQ(td)) {
 		sq = sleepq_lookup(wchan);
 		if (sleepq_resume_thread(sq, td, 0)) {
 #ifdef INVARIANTS
 			/*
 			 * This thread hasn't gone to sleep yet, so it
 			 * should not be swapped out.
 			 */
 			panic("not waking up swapper");
 #endif
 		}
 	}
 	mtx_unlock_spin(&sc->sc_lock);
 	MPASS(td->td_lock != &sc->sc_lock);
 	return (ret);
 }
 
 /*
  * Switches to another thread if we are still asleep on a sleep queue.
  * Returns with thread lock.
  */
 static void
 sleepq_switch(void *wchan, int pri)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 	bool rtc_changed;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/*
 	 * If we have a sleep queue, then we've already been woken up, so
 	 * just return.
 	 */
 	if (td->td_sleepqueue != NULL) {
 		mtx_unlock_spin(&sc->sc_lock);
 		return;
 	}
 
 	/*
 	 * If TDF_TIMEOUT is set, then our sleep has been timed out
 	 * already but we are still on the sleep queue, so dequeue the
 	 * thread and return.
 	 *
 	 * Do the same if the real-time clock has been adjusted since this
 	 * thread calculated its timeout based on that clock.  This handles
 	 * the following race:
 	 * - The Ts thread needs to sleep until an absolute real-clock time.
 	 *   It copies the global rtc_generation into curthread->td_rtcgen,
 	 *   reads the RTC, and calculates a sleep duration based on that time.
 	 *   See umtxq_sleep() for an example.
 	 * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
 	 *   threads that are sleeping until an absolute real-clock time.
 	 *   See tc_setclock() and the POSIX specification of clock_settime().
 	 * - Ts reaches the code below.  It holds the sleepqueue chain lock,
 	 *   so Tc has finished waking, so this thread must test td_rtcgen.
 	 * (The declaration of td_rtcgen refers to this comment.)
 	 */
 	rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
 	if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
 		if (rtc_changed) {
 			td->td_rtcgen = 0;
 		}
 		MPASS(TD_ON_SLEEPQ(td));
 		sq = sleepq_lookup(wchan);
 		if (sleepq_resume_thread(sq, td, 0)) {
 #ifdef INVARIANTS
 			/*
 			 * This thread hasn't gone to sleep yet, so it
 			 * should not be swapped out.
 			 */
 			panic("not waking up swapper");
 #endif
 		}
 		mtx_unlock_spin(&sc->sc_lock);
 		return;
 	}
 #ifdef SLEEPQUEUE_PROFILING
 	if (prof_enabled)
 		sleepq_profile(td->td_wmesg);
 #endif
 	MPASS(td->td_sleepqueue == NULL);
 	sched_sleep(td, pri);
 	thread_lock_set(td, &sc->sc_lock);
 	SDT_PROBE0(sched, , , sleep);
 	TD_SET_SLEEPING(td);
 	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
 	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
 	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 }
 
 /*
  * Check to see if we timed out.
  */
 static int
 sleepq_check_timeout(void)
 {
 	struct thread *td;
 	int res;
 
 	td = curthread;
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/*
 	 * If TDF_TIMEOUT is set, we timed out.  But recheck
 	 * td_sleeptimo anyway.
 	 */
 	res = 0;
 	if (td->td_sleeptimo != 0) {
 		if (td->td_sleeptimo <= sbinuptime())
 			res = EWOULDBLOCK;
 		td->td_sleeptimo = 0;
 	}
 	if (td->td_flags & TDF_TIMEOUT)
 		td->td_flags &= ~TDF_TIMEOUT;
 	else
 		/*
 		 * We ignore the situation where timeout subsystem was
 		 * unable to stop our callout.  The struct thread is
 		 * type-stable, the callout will use the correct
 		 * memory when running.  The checks of the
 		 * td_sleeptimo value in this function and in
 		 * sleepq_timeout() ensure that the thread does not
 		 * get spurious wakeups, even if the callout was reset
 		 * or thread reused.
 		 */
 		callout_stop(&td->td_slpcallout);
 	return (res);
 }
 
 /*
  * Check to see if we were awoken by a signal.
  */
 static int
 sleepq_check_signals(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/* We are no longer in an interruptible sleep. */
 	if (td->td_flags & TDF_SINTR)
 		td->td_flags &= ~TDF_SINTR;
 
 	if (td->td_flags & TDF_SLEEPABORT) {
 		td->td_flags &= ~TDF_SLEEPABORT;
 		return (td->td_intrval);
 	}
 
 	return (0);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue.
  */
 void
 sleepq_wait(void *wchan, int pri)
 {
 	struct thread *td;
 
 	td = curthread;
 	MPASS(!(td->td_flags & TDF_SINTR));
 	thread_lock(td);
 	sleepq_switch(wchan, pri);
 	thread_unlock(td);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it is interrupted by a signal.
  */
 int
 sleepq_wait_sig(void *wchan, int pri)
 {
 	int rcatch;
 	int rval;
 
 	rcatch = sleepq_catch_signals(wchan, pri);
 	rval = sleepq_check_signals();
 	thread_unlock(curthread);
 	if (rcatch)
 		return (rcatch);
 	return (rval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it times out while waiting.
  */
 int
 sleepq_timedwait(void *wchan, int pri)
 {
 	struct thread *td;
 	int rval;
 
 	td = curthread;
 	MPASS(!(td->td_flags & TDF_SINTR));
 	thread_lock(td);
 	sleepq_switch(wchan, pri);
 	rval = sleepq_check_timeout();
 	thread_unlock(td);
 
 	return (rval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue,
  * it is interrupted by a signal, or it times out waiting to be awakened.
  */
 int
 sleepq_timedwait_sig(void *wchan, int pri)
 {
 	int rcatch, rvalt, rvals;
 
 	rcatch = sleepq_catch_signals(wchan, pri);
 	rvalt = sleepq_check_timeout();
 	rvals = sleepq_check_signals();
 	thread_unlock(curthread);
 	if (rcatch)
 		return (rcatch);
 	if (rvals)
 		return (rvals);
 	return (rvalt);
 }
 
 /*
  * Returns the type of sleepqueue given a waitchannel.
  */
 int
 sleepq_type(void *wchan)
 {
 	struct sleepqueue *sq;
 	int type;
 
 	MPASS(wchan != NULL);
 
 	sleepq_lock(wchan);
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL) {
 		sleepq_release(wchan);
 		return (-1);
 	}
 	type = sq->sq_type;
 	sleepq_release(wchan);
 	return (type);
 }
 
 /*
  * Removes a thread from a sleep queue and makes it
  * runnable.
  */
 static int
 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
 {
 	struct sleepqueue_chain *sc __unused;
 
 	MPASS(td != NULL);
 	MPASS(sq->sq_wchan != NULL);
 	MPASS(td->td_wchan == sq->sq_wchan);
 	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	sc = SC_LOOKUP(sq->sq_wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 
 	SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
 
 	/* Remove the thread from the queue. */
 	sq->sq_blockedcnt[td->td_sqqueue]--;
 	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
 
 	/*
 	 * Get a sleep queue for this thread.  If this is the last waiter,
 	 * use the queue itself and take it out of the chain, otherwise,
 	 * remove a queue from the free list.
 	 */
 	if (LIST_EMPTY(&sq->sq_free)) {
 		td->td_sleepqueue = sq;
 #ifdef INVARIANTS
 		sq->sq_wchan = NULL;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth--;
 #endif
 	} else
 		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
 	LIST_REMOVE(td->td_sleepqueue, sq_hash);
 
 	td->td_wmesg = NULL;
 	td->td_wchan = NULL;
 	td->td_flags &= ~TDF_SINTR;
 
 	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
 
 	/* Adjust priority if requested. */
 	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
 	if (pri != 0 && td->td_priority > pri &&
 	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, pri);
 
 	/*
 	 * Note that thread td might not be sleeping if it is running
 	 * sleepq_catch_signals() on another CPU or is blocked on its
 	 * proc lock to check signals.  There's no need to mark the
 	 * thread runnable in that case.
 	 */
 	if (TD_IS_SLEEPING(td)) {
 		TD_CLR_SLEEPING(td);
 		return (setrunnable(td));
 	}
 	return (0);
 }
 
 #ifdef INVARIANTS
 /*
  * UMA zone item deallocator.
  */
 static void
 sleepq_dtor(void *mem, int size, void *arg)
 {
 	struct sleepqueue *sq;
 	int i;
 
 	sq = mem;
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
 		MPASS(sq->sq_blockedcnt[i] == 0);
 	}
 }
 #endif
 
 /*
  * UMA zone item initializer.
  */
 static int
 sleepq_init(void *mem, int size, int flags)
 {
 	struct sleepqueue *sq;
 	int i;
 
 	bzero(mem, size);
 	sq = mem;
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		TAILQ_INIT(&sq->sq_blocked[i]);
 		sq->sq_blockedcnt[i] = 0;
 	}
 	LIST_INIT(&sq->sq_free);
 	return (0);
 }
 
 /*
  * Find the highest priority thread sleeping on a wait channel and resume it.
  */
 int
 sleepq_signal(void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue *sq;
 	struct thread *td, *besttd;
 	int wakeup_swapper;
 
 	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	/*
 	 * Find the highest priority thread on the queue.  If there is a
 	 * tie, use the thread that first appears in the queue as it has
 	 * been sleeping the longest since threads are always added to
 	 * the tail of sleep queues.
 	 */
 	besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
 	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
 		if (td->td_priority < besttd->td_priority)
 			besttd = td;
 	}
 	MPASS(besttd != NULL);
 	thread_lock(besttd);
 	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
 	thread_unlock(besttd);
 	return (wakeup_swapper);
 }
 
 static bool
 match_any(struct thread *td __unused)
 {
 
 	return (true);
 }
 
 /*
  * Resume all threads sleeping on a specified wait channel.
  */
 int
 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue *sq;
 
 	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	return (sleepq_remove_matching(sq, queue, match_any, pri));
 }
 
 /*
  * Resume threads on the sleep queue that match the given predicate.
  */
 int
 sleepq_remove_matching(struct sleepqueue *sq, int queue,
     bool (*matches)(struct thread *), int pri)
 {
 	struct thread *td, *tdn;
 	int wakeup_swapper;
 
 	/*
 	 * The last thread will be given ownership of sq and may
 	 * re-enqueue itself before sleepq_resume_thread() returns,
 	 * so we must cache the "next" queue item at the beginning
 	 * of the final iteration.
 	 */
 	wakeup_swapper = 0;
 	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 		thread_lock(td);
 		if (matches(td))
 			wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
 		thread_unlock(td);
 	}
 
 	return (wakeup_swapper);
 }
 
 /*
  * Time sleeping threads out.  When the timeout expires, the thread is
  * removed from the sleep queue and made runnable if it is still asleep.
  */
 static void
 sleepq_timeout(void *arg)
 {
 	struct sleepqueue_chain *sc __unused;
 	struct sleepqueue *sq;
 	struct thread *td;
 	void *wchan;
 	int wakeup_swapper;
 
 	td = arg;
 	wakeup_swapper = 0;
 	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 
 	thread_lock(td);
 
 	if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
 		/*
 		 * The thread does not want a timeout (yet).
 		 */
 	} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 		/*
 		 * See if the thread is asleep and get the wait
 		 * channel if it is.
 		 */
 		wchan = td->td_wchan;
 		sc = SC_LOOKUP(wchan);
 		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 		sq = sleepq_lookup(wchan);
 		MPASS(sq != NULL);
 		td->td_flags |= TDF_TIMEOUT;
 		wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 	} else if (TD_ON_SLEEPQ(td)) {
 		/*
 		 * If the thread is on the SLEEPQ but isn't sleeping
 		 * yet, it can either be on another CPU in between
 		 * sleepq_add() and one of the sleepq_*wait*()
 		 * routines or it can be in sleepq_catch_signals().
 		 */
 		td->td_flags |= TDF_TIMEOUT;
 	}
 
 	thread_unlock(td);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Resumes a specific thread from the sleep queue associated with a specific
  * wait channel if it is on that queue.
  */
 void
 sleepq_remove(struct thread *td, void *wchan)
 {
 	struct sleepqueue *sq;
 	int wakeup_swapper;
 
 	/*
 	 * Look up the sleep queue for this wait channel, then re-check
 	 * that the thread is asleep on that channel, if it is not, then
 	 * bail.
 	 */
 	MPASS(wchan != NULL);
 	sleepq_lock(wchan);
 	sq = sleepq_lookup(wchan);
 	/*
 	 * We can not lock the thread here as it may be sleeping on a
 	 * different sleepq.  However, holding the sleepq lock for this
 	 * wchan can guarantee that we do not miss a wakeup for this
 	 * channel.  The asserts below will catch any false positives.
 	 */
 	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 		sleepq_release(wchan);
 		return;
 	}
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	thread_lock(td);
 	MPASS(sq != NULL);
 	MPASS(td->td_wchan == wchan);
 	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 	thread_unlock(td);
 	sleepq_release(wchan);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Abort a thread as if an interrupt had occurred.  Only abort
  * interruptible waits (unfortunately it isn't safe to abort others).
  */
 int
 sleepq_abort(struct thread *td, int intrval)
 {
 	struct sleepqueue *sq;
 	void *wchan;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_flags & TDF_SINTR);
 	MPASS(intrval == EINTR || intrval == ERESTART);
 
 	/*
 	 * If the TDF_TIMEOUT flag is set, just leave. A
 	 * timeout is scheduled anyhow.
 	 */
 	if (td->td_flags & TDF_TIMEOUT)
 		return (0);
 
 	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 	td->td_intrval = intrval;
 	td->td_flags |= TDF_SLEEPABORT;
 	/*
 	 * If the thread has not slept yet it will find the signal in
 	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 	 * we have to do it here.
 	 */
 	if (!TD_IS_SLEEPING(td))
 		return (0);
 	wchan = td->td_wchan;
 	MPASS(wchan != NULL);
 	sq = sleepq_lookup(wchan);
 	MPASS(sq != NULL);
 
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	return (sleepq_resume_thread(sq, td, 0));
 }
 
 void
 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq, *sq1;
 	int i, wakeup_swapper;
 
 	wakeup_swapper = 0;
 	for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 		if (LIST_EMPTY(&sc->sc_queues)) {
 			continue;
 		}
 		mtx_lock_spin(&sc->sc_lock);
 		LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 			for (i = 0; i < NR_SLEEPQS; ++i) {
 				wakeup_swapper |= sleepq_remove_matching(sq, i,
 				    matches, 0);
 			}
 		}
 		mtx_unlock_spin(&sc->sc_lock);
 	}
 	if (wakeup_swapper) {
 		kick_proc0();
 	}
 }
 
 /*
  * Prints the stacks of all threads presently sleeping on wchan/queue to
  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
  * printed.  Typically, this will equal the number of threads sleeping on the
  * queue, but may be less if sb overflowed before all stacks were printed.
  */
 #ifdef STACK
 int
 sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
     int *count_stacks_printed)
 {
 	struct thread *td, *td_next;
 	struct sleepqueue *sq;
 	struct stack **st;
 	struct sbuf **td_infos;
 	int i, stack_idx, error, stacks_to_allocate;
 	bool finished;
 
 	error = 0;
 	finished = false;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 
 	stacks_to_allocate = 10;
 	for (i = 0; i < 3 && !finished ; i++) {
 		/* We cannot malloc while holding the queue's spinlock, so
 		 * we do our mallocs now, and hope it is enough.  If it
 		 * isn't, we will free these, drop the lock, malloc more,
 		 * and try again, up to a point.  After that point we will
 		 * give up and report ENOMEM. We also cannot write to sb
 		 * during this time since the client may have set the
 		 * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 		 * malloc as we print to it.  So we defer actually printing
 		 * to sb until after we drop the spinlock.
 		 */
 
 		/* Where we will store the stacks. */
 		st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 		    M_TEMP, M_WAITOK);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			st[stack_idx] = stack_create(M_WAITOK);
 
 		/* Where we will store the td name, tid, etc. */
 		td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 		    M_TEMP, M_WAITOK);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			td_infos[stack_idx] = sbuf_new(NULL, NULL,
 			    MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 			    SBUF_FIXEDLEN);
 
 		sleepq_lock(wchan);
 		sq = sleepq_lookup(wchan);
 		if (sq == NULL) {
 			/* This sleepq does not exist; exit and return ENOENT. */
 			error = ENOENT;
 			finished = true;
 			sleepq_release(wchan);
 			goto loop_end;
 		}
 
 		stack_idx = 0;
 		/* Save thread info */
 		TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 		    td_next) {
 			if (stack_idx >= stacks_to_allocate)
 				goto loop_end;
 
 			/* Note the td_lock is equal to the sleepq_lock here. */
 			stack_save_td(st[stack_idx], td);
 
 			sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 			    td->td_tid, td->td_name, td);
 
 			++stack_idx;
 		}
 
 		finished = true;
 		sleepq_release(wchan);
 
 		/* Print the stacks */
 		for (i = 0; i < stack_idx; i++) {
 			sbuf_finish(td_infos[i]);
 			sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 			stack_sbuf_print(sb, st[i]);
 			sbuf_printf(sb, "\n");
 
 			error = sbuf_error(sb);
 			if (error == 0)
 				*count_stacks_printed = stack_idx;
 		}
 
 loop_end:
 		if (!finished)
 			sleepq_release(wchan);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			stack_destroy(st[stack_idx]);
 		for (stack_idx = 0; stack_idx < stacks_to_allocate;
 		    stack_idx++)
 			sbuf_delete(td_infos[stack_idx]);
 		free(st, M_TEMP);
 		free(td_infos, M_TEMP);
 		stacks_to_allocate *= 10;
 	}
 
 	if (!finished && error == 0)
 		error = ENOMEM;
 
 	return (error);
 }
 #endif
 
 #ifdef SLEEPQUEUE_PROFILING
 #define	SLEEPQ_PROF_LOCATIONS	1024
 #define	SLEEPQ_SBUFSIZE		512
 struct sleepq_prof {
 	LIST_ENTRY(sleepq_prof) sp_link;
 	const char	*sp_wmesg;
 	long		sp_count;
 };
 
 LIST_HEAD(sqphead, sleepq_prof);
 
 struct sqphead sleepq_prof_free;
 struct sqphead sleepq_hash[SC_TABLESIZE];
 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 static struct mtx sleepq_prof_lock;
 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 
 static void
 sleepq_profile(const char *wmesg)
 {
 	struct sleepq_prof *sp;
 
 	mtx_lock_spin(&sleepq_prof_lock);
 	if (prof_enabled == 0)
 		goto unlock;
 	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 		if (sp->sp_wmesg == wmesg)
 			goto done;
 	sp = LIST_FIRST(&sleepq_prof_free);
 	if (sp == NULL)
 		goto unlock;
 	sp->sp_wmesg = wmesg;
 	LIST_REMOVE(sp, sp_link);
 	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 done:
 	sp->sp_count++;
 unlock:
 	mtx_unlock_spin(&sleepq_prof_lock);
 	return;
 }
 
 static void
 sleepq_prof_reset(void)
 {
 	struct sleepq_prof *sp;
 	int enabled;
 	int i;
 
 	mtx_lock_spin(&sleepq_prof_lock);
 	enabled = prof_enabled;
 	prof_enabled = 0;
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_INIT(&sleepq_hash[i]);
 	LIST_INIT(&sleepq_prof_free);
 	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 		sp = &sleepq_profent[i];
 		sp->sp_wmesg = NULL;
 		sp->sp_count = 0;
 		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 	}
 	prof_enabled = enabled;
 	mtx_unlock_spin(&sleepq_prof_lock);
 }
 
 static int
 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = prof_enabled;
 	error = sysctl_handle_int(oidp, &v, v, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == prof_enabled)
 		return (0);
 	if (v == 1)
 		sleepq_prof_reset();
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = !!v;
 	mtx_unlock_spin(&sleepq_prof_lock);
 
 	return (0);
 }
 
 static int
 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = 0;
 	error = sysctl_handle_int(oidp, &v, 0, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == 0)
 		return (0);
 	sleepq_prof_reset();
 
 	return (0);
 }
 
 static int
 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct sleepq_prof *sp;
 	struct sbuf *sb;
 	int enabled;
 	int error;
 	int i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 	sbuf_printf(sb, "\nwmesg\tcount\n");
 	enabled = prof_enabled;
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = 0;
 	mtx_unlock_spin(&sleepq_prof_lock);
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 			sbuf_printf(sb, "%s\t%ld\n",
 			    sp->sp_wmesg, sp->sp_count);
 		}
 	}
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = enabled;
 	mtx_unlock_spin(&sleepq_prof_lock);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, reset_sleepq_prof_stats, "I",
     "Reset sleepqueue profiling statistics");
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
 #endif
 
 #ifdef DDB
 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 #ifdef INVARIANTS
 	struct lock_object *lock;
 #endif
 	struct thread *td;
 	void *wchan;
 	int i;
 
 	if (!have_addr)
 		return;
 
 	/*
 	 * First, see if there is an active sleep queue for the wait channel
 	 * indicated by the address.
 	 */
 	wchan = (void *)addr;
 	sc = SC_LOOKUP(wchan);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			goto found;
 
 	/*
 	 * Second, see if there is an active sleep queue at the address
 	 * indicated.
 	 */
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 			if (sq == (struct sleepqueue *)addr)
 				goto found;
 		}
 
 	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 	return;
 found:
 	db_printf("Wait channel: %p\n", sq->sq_wchan);
 	db_printf("Queue type: %d\n", sq->sq_type);
 #ifdef INVARIANTS
 	if (sq->sq_lock) {
 		lock = sq->sq_lock;
 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
 	}
 #endif
 	db_printf("Blocked threads:\n");
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		db_printf("\nQueue[%d]:\n", i);
 		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 			db_printf("\tempty\n");
 		else
 			TAILQ_FOREACH(td, &sq->sq_blocked[i],
 				      td_slpq) {
 				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 					  td->td_tid, td->td_proc->p_pid,
 					  td->td_name);
 			}
 		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 	}
 }
 
 /* Alias 'show sleepqueue' to 'show sleepq'. */
 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 #endif
Index: head/sys/kern/subr_smp.c
===================================================================
--- head/sys/kern/subr_smp.c	(revision 344854)
+++ head/sys/kern/subr_smp.c	(revision 344855)
@@ -1,1174 +1,1173 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>.
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This module holds the global variables and machine independent functions
  * used for the kernel SMP support.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <machine/cpu.h>
 #include <machine/smp.h>
 
 #include "opt_sched.h"
 
 #ifdef SMP
 MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data");
 
 volatile cpuset_t stopped_cpus;
 volatile cpuset_t started_cpus;
 volatile cpuset_t suspended_cpus;
 cpuset_t hlt_cpus_mask;
 cpuset_t logical_cpus_mask;
 
 void (*cpustop_restartfunc)(void);
 #endif
 
 static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);
 
 /* This is used in modules that need to work in both SMP and UP. */
 cpuset_t all_cpus;
 
 int mp_ncpus;
 /* export this for libkvm consumers. */
 int mp_maxcpus = MAXCPU;
 
 volatile int smp_started;
 u_int mp_maxid;
 
 static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
     "Kernel SMP");
 
 SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
     "Max CPU ID.");
 
 SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
     0, "Max number of CPUs that the system was compiled for.");
 
 SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD|CTLTYPE_INT|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_kern_smp_active, "I",
     "Indicates system is running in SMP mode");
 
 int smp_disabled = 0;	/* has smp been disabled? */
 SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
     &smp_disabled, 0, "SMP has been disabled from the loader");
 
 int smp_cpus = 1;	/* how many cpu's running */
 SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
     "Number of CPUs online");
 
 int smp_threads_per_core = 1;	/* how many SMT threads are running per core */
 SYSCTL_INT(_kern_smp, OID_AUTO, threads_per_core, CTLFLAG_RD|CTLFLAG_CAPRD,
     &smp_threads_per_core, 0, "Number of SMT threads online per core");
 
 int mp_ncores = -1;	/* how many physical cores running */
 SYSCTL_INT(_kern_smp, OID_AUTO, cores, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_ncores, 0,
     "Number of CPUs online");
 
 int smp_topology = 0;	/* Which topology we're using. */
 SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0,
     "Topology override setting; 0 is default provided by hardware.");
 
 #ifdef SMP
 /* Enable forwarding of a signal to a process running on a different CPU */
 static int forward_signal_enabled = 1;
 SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
 	   &forward_signal_enabled, 0,
 	   "Forwarding of a signal to a process on a different CPU");
 
 /* Variables needed for SMP rendezvous. */
 static volatile int smp_rv_ncpus;
 static void (*volatile smp_rv_setup_func)(void *arg);
 static void (*volatile smp_rv_action_func)(void *arg);
 static void (*volatile smp_rv_teardown_func)(void *arg);
 static void *volatile smp_rv_func_arg;
 static volatile int smp_rv_waiters[4];
 
 /* 
  * Shared mutex to restrict busywaits between smp_rendezvous() and
  * smp(_targeted)_tlb_shootdown().  A deadlock occurs if both of these
  * functions trigger at once and cause multiple CPUs to busywait with
  * interrupts disabled. 
  */
 struct mtx smp_ipi_mtx;
 
 /*
  * Let the MD SMP code initialize mp_maxid very early if it can.
  */
 static void
 mp_setmaxid(void *dummy)
 {
 
 	cpu_mp_setmaxid();
 
 	KASSERT(mp_ncpus >= 1, ("%s: CPU count < 1", __func__));
 	KASSERT(mp_ncpus > 1 || mp_maxid == 0,
 	    ("%s: one CPU but mp_maxid is not zero", __func__));
 	KASSERT(mp_maxid >= mp_ncpus - 1,
 	    ("%s: counters out of sync: max %d, count %d", __func__,
 		mp_maxid, mp_ncpus));
 }
 SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
 
 /*
  * Call the MD SMP initialization code.
  */
 static void
 mp_start(void *dummy)
 {
 
 	mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);
 
 	/* Probe for MP hardware. */
 	if (smp_disabled != 0 || cpu_mp_probe() == 0) {
 		mp_ncores = 1;
 		mp_ncpus = 1;
 		CPU_SETOF(PCPU_GET(cpuid), &all_cpus);
 		return;
 	}
 
 	cpu_mp_start();
 	printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
 	    mp_ncpus);
 
 	/* Provide a default for most architectures that don't have SMT/HTT. */
 	if (mp_ncores < 0)
 		mp_ncores = mp_ncpus;
 
 	cpu_mp_announce();
 }
 SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
 
 void
 forward_signal(struct thread *td)
 {
 	int id;
 
 	/*
 	 * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on
 	 * this thread, so all we need to do is poke it if it is currently
 	 * executing so that it executes ast().
 	 */
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(TD_IS_RUNNING(td),
 	    ("forward_signal: thread is not TDS_RUNNING"));
 
 	CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
 
 	if (!smp_started || cold || panicstr)
 		return;
 	if (!forward_signal_enabled)
 		return;
 
 	/* No need to IPI ourself. */
 	if (td == curthread)
 		return;
 
 	id = td->td_oncpu;
 	if (id == NOCPU)
 		return;
 	ipi_cpu(id, IPI_AST);
 }
 
 /*
  * When called the executing CPU will send an IPI to all other CPUs
  *  requesting that they halt execution.
  *
  * Usually (but not necessarily) called with 'other_cpus' as its arg.
  *
  *  - Signals all CPUs in map to stop.
  *  - Waits for each to stop.
  *
  * Returns:
  *  -1: error
  *   0: NA
  *   1: ok
  *
  */
 #if defined(__amd64__) || defined(__i386__)
 #define	X86	1
 #else
 #define	X86	0
 #endif
 static int
 generic_stop_cpus(cpuset_t map, u_int type)
 {
 #ifdef KTR
 	char cpusetbuf[CPUSETBUFSIZ];
 #endif
 	static volatile u_int stopping_cpu = NOCPU;
 	int i;
 	volatile cpuset_t *cpus;
 
 	KASSERT(
 	    type == IPI_STOP || type == IPI_STOP_HARD
 #if X86
 	    || type == IPI_SUSPEND
 #endif
 	    , ("%s: invalid stop type", __func__));
 
 	if (!smp_started)
 		return (0);
 
 	CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
 	    cpusetobj_strprint(cpusetbuf, &map), type);
 
 #if X86
 	/*
 	 * When suspending, ensure there are are no IPIs in progress.
 	 * IPIs that have been issued, but not yet delivered (e.g.
 	 * not pending on a vCPU when running under virtualization)
 	 * will be lost, violating FreeBSD's assumption of reliable
 	 * IPI delivery.
 	 */
 	if (type == IPI_SUSPEND)
 		mtx_lock_spin(&smp_ipi_mtx);
 #endif
 
 #if X86
 	if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
 #endif
 	if (stopping_cpu != PCPU_GET(cpuid))
 		while (atomic_cmpset_int(&stopping_cpu, NOCPU,
 		    PCPU_GET(cpuid)) == 0)
 			while (stopping_cpu != NOCPU)
 				cpu_spinwait(); /* spin */
 
 	/* send the stop IPI to all CPUs in map */
 	ipi_selected(map, type);
 #if X86
 	}
 #endif
 
 #if X86
 	if (type == IPI_SUSPEND)
 		cpus = &suspended_cpus;
 	else
 #endif
 		cpus = &stopped_cpus;
 
 	i = 0;
 	while (!CPU_SUBSET(cpus, &map)) {
 		/* spin */
 		cpu_spinwait();
 		i++;
 		if (i == 100000000) {
 			printf("timeout stopping cpus\n");
 			break;
 		}
 	}
 
 #if X86
 	if (type == IPI_SUSPEND)
 		mtx_unlock_spin(&smp_ipi_mtx);
 #endif
 
 	stopping_cpu = NOCPU;
 	return (1);
 }
 
 int
 stop_cpus(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_STOP));
 }
 
 int
 stop_cpus_hard(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_STOP_HARD));
 }
 
 #if X86
 int
 suspend_cpus(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_SUSPEND));
 }
 #endif
 
 /*
  * Called by a CPU to restart stopped CPUs. 
  *
  * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
  *
  *  - Signals all CPUs in map to restart.
  *  - Waits for each to restart.
  *
  * Returns:
  *  -1: error
  *   0: NA
  *   1: ok
  */
 static int
 generic_restart_cpus(cpuset_t map, u_int type)
 {
 #ifdef KTR
 	char cpusetbuf[CPUSETBUFSIZ];
 #endif
 	volatile cpuset_t *cpus;
 
 	KASSERT(type == IPI_STOP || type == IPI_STOP_HARD
 #if X86
 	    || type == IPI_SUSPEND
 #endif
 	    , ("%s: invalid stop type", __func__));
 
 	if (!smp_started)
 		return (0);
 
 	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
 
 #if X86
 	if (type == IPI_SUSPEND)
 		cpus = &resuming_cpus;
 	else
 #endif
 		cpus = &stopped_cpus;
 
 	/* signal other cpus to restart */
 #if X86
 	if (type == IPI_SUSPEND)
 		CPU_COPY_STORE_REL(&map, &toresume_cpus);
 	else
 #endif
 		CPU_COPY_STORE_REL(&map, &started_cpus);
 
 #if X86
 	if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
 #endif
 	/* wait for each to clear its bit */
 	while (CPU_OVERLAP(cpus, &map))
 		cpu_spinwait();
 #if X86
 	}
 #endif
 
 	return (1);
 }
 
 int
 restart_cpus(cpuset_t map)
 {
 
 	return (generic_restart_cpus(map, IPI_STOP));
 }
 
 #if X86
 int
 resume_cpus(cpuset_t map)
 {
 
 	return (generic_restart_cpus(map, IPI_SUSPEND));
 }
 #endif
 #undef X86
 
 /*
  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function 
  * (if specified), rendezvous, execute the action function (if specified),
  * rendezvous again, execute the teardown function (if specified), and then
  * resume.
  *
  * Note that the supplied external functions _must_ be reentrant and aware
  * that they are running in parallel and in an unknown lock context.
  */
 void
 smp_rendezvous_action(void)
 {
 	struct thread *td;
 	void *local_func_arg;
 	void (*local_setup_func)(void*);
 	void (*local_action_func)(void*);
 	void (*local_teardown_func)(void*);
 #ifdef INVARIANTS
 	int owepreempt;
 #endif
 
 	/* Ensure we have up-to-date values. */
 	atomic_add_acq_int(&smp_rv_waiters[0], 1);
 	while (smp_rv_waiters[0] < smp_rv_ncpus)
 		cpu_spinwait();
 
 	/* Fetch rendezvous parameters after acquire barrier. */
 	local_func_arg = smp_rv_func_arg;
 	local_setup_func = smp_rv_setup_func;
 	local_action_func = smp_rv_action_func;
 	local_teardown_func = smp_rv_teardown_func;
 
 	/*
 	 * Use a nested critical section to prevent any preemptions
 	 * from occurring during a rendezvous action routine.
 	 * Specifically, if a rendezvous handler is invoked via an IPI
 	 * and the interrupted thread was in the critical_exit()
 	 * function after setting td_critnest to 0 but before
 	 * performing a deferred preemption, this routine can be
 	 * invoked with td_critnest set to 0 and td_owepreempt true.
 	 * In that case, a critical_exit() during the rendezvous
 	 * action would trigger a preemption which is not permitted in
 	 * a rendezvous action.  To fix this, wrap all of the
 	 * rendezvous action handlers in a critical section.  We
 	 * cannot use a regular critical section however as having
 	 * critical_exit() preempt from this routine would also be
 	 * problematic (the preemption must not occur before the IPI
 	 * has been acknowledged via an EOI).  Instead, we
 	 * intentionally ignore td_owepreempt when leaving the
 	 * critical section.  This should be harmless because we do
 	 * not permit rendezvous action routines to schedule threads,
 	 * and thus td_owepreempt should never transition from 0 to 1
 	 * during this routine.
 	 */
 	td = curthread;
 	td->td_critnest++;
 #ifdef INVARIANTS
 	owepreempt = td->td_owepreempt;
 #endif
 	
 	/*
 	 * If requested, run a setup function before the main action
 	 * function.  Ensure all CPUs have completed the setup
 	 * function before moving on to the action function.
 	 */
 	if (local_setup_func != smp_no_rendezvous_barrier) {
 		if (smp_rv_setup_func != NULL)
 			smp_rv_setup_func(smp_rv_func_arg);
 		atomic_add_int(&smp_rv_waiters[1], 1);
 		while (smp_rv_waiters[1] < smp_rv_ncpus)
                 	cpu_spinwait();
 	}
 
 	if (local_action_func != NULL)
 		local_action_func(local_func_arg);
 
 	if (local_teardown_func != smp_no_rendezvous_barrier) {
 		/*
 		 * Signal that the main action has been completed.  If a
 		 * full exit rendezvous is requested, then all CPUs will
 		 * wait here until all CPUs have finished the main action.
 		 */
 		atomic_add_int(&smp_rv_waiters[2], 1);
 		while (smp_rv_waiters[2] < smp_rv_ncpus)
 			cpu_spinwait();
 
 		if (local_teardown_func != NULL)
 			local_teardown_func(local_func_arg);
 	}
 
 	/*
 	 * Signal that the rendezvous is fully completed by this CPU.
 	 * This means that no member of smp_rv_* pseudo-structure will be
 	 * accessed by this target CPU after this point; in particular,
 	 * memory pointed by smp_rv_func_arg.
 	 *
 	 * The release semantic ensures that all accesses performed by
 	 * the current CPU are visible when smp_rendezvous_cpus()
 	 * returns, by synchronizing with the
 	 * atomic_load_acq_int(&smp_rv_waiters[3]).
 	 */
 	atomic_add_rel_int(&smp_rv_waiters[3], 1);
 
 	td->td_critnest--;
 	KASSERT(owepreempt == td->td_owepreempt,
 	    ("rendezvous action changed td_owepreempt"));
 }
 
 void
 smp_rendezvous_cpus(cpuset_t map,
 	void (* setup_func)(void *), 
 	void (* action_func)(void *),
 	void (* teardown_func)(void *),
 	void *arg)
 {
 	int curcpumap, i, ncpus = 0;
 
 	/* Look comments in the !SMP case. */
 	if (!smp_started) {
 		spinlock_enter();
 		if (setup_func != NULL)
 			setup_func(arg);
 		if (action_func != NULL)
 			action_func(arg);
 		if (teardown_func != NULL)
 			teardown_func(arg);
 		spinlock_exit();
 		return;
 	}
 
 	CPU_FOREACH(i) {
 		if (CPU_ISSET(i, &map))
 			ncpus++;
 	}
 	if (ncpus == 0)
 		panic("ncpus is 0 with non-zero map");
 
 	mtx_lock_spin(&smp_ipi_mtx);
 
 	/* Pass rendezvous parameters via global variables. */
 	smp_rv_ncpus = ncpus;
 	smp_rv_setup_func = setup_func;
 	smp_rv_action_func = action_func;
 	smp_rv_teardown_func = teardown_func;
 	smp_rv_func_arg = arg;
 	smp_rv_waiters[1] = 0;
 	smp_rv_waiters[2] = 0;
 	smp_rv_waiters[3] = 0;
 	atomic_store_rel_int(&smp_rv_waiters[0], 0);
 
 	/*
 	 * Signal other processors, which will enter the IPI with
 	 * interrupts off.
 	 */
 	curcpumap = CPU_ISSET(curcpu, &map);
 	CPU_CLR(curcpu, &map);
 	ipi_selected(map, IPI_RENDEZVOUS);
 
 	/* Check if the current CPU is in the map */
 	if (curcpumap != 0)
 		smp_rendezvous_action();
 
 	/*
 	 * Ensure that the master CPU waits for all the other
 	 * CPUs to finish the rendezvous, so that smp_rv_*
 	 * pseudo-structure and the arg are guaranteed to not
 	 * be in use.
 	 *
 	 * Load acquire synchronizes with the release add in
 	 * smp_rendezvous_action(), which ensures that our caller sees
 	 * all memory actions done by the called functions on other
 	 * CPUs.
 	 */
 	while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
 		cpu_spinwait();
 
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 void
 smp_rendezvous(void (* setup_func)(void *), 
 	       void (* action_func)(void *),
 	       void (* teardown_func)(void *),
 	       void *arg)
 {
 	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
 }
 
 static struct cpu_group group[MAXCPU * MAX_CACHE_LEVELS + 1];
 
 struct cpu_group *
 smp_topo(void)
 {
 	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
 	struct cpu_group *top;
 
 	/*
 	 * Check for a fake topology request for debugging purposes.
 	 */
 	switch (smp_topology) {
 	case 1:
 		/* Dual core with no sharing.  */
 		top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
 		break;
 	case 2:
 		/* No topology, all cpus are equal. */
 		top = smp_topo_none();
 		break;
 	case 3:
 		/* Dual core with shared L2.  */
 		top = smp_topo_1level(CG_SHARE_L2, 2, 0);
 		break;
 	case 4:
 		/* quad core, shared l3 among each package, private l2.  */
 		top = smp_topo_1level(CG_SHARE_L3, 4, 0);
 		break;
 	case 5:
 		/* quad core,  2 dualcore parts on each package share l2.  */
 		top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
 		break;
 	case 6:
 		/* Single-core 2xHTT */
 		top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
 		break;
 	case 7:
 		/* quad core with a shared l3, 8 threads sharing L2.  */
 		top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
 		    CG_FLAG_SMT);
 		break;
 	default:
 		/* Default, ask the system what it wants. */
 		top = cpu_topo();
 		break;
 	}
 	/*
 	 * Verify the returned topology.
 	 */
 	if (top->cg_count != mp_ncpus)
 		panic("Built bad topology at %p.  CPU count %d != %d",
 		    top, top->cg_count, mp_ncpus);
 	if (CPU_CMP(&top->cg_mask, &all_cpus))
 		panic("Built bad topology at %p.  CPU mask (%s) != (%s)",
 		    top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
 		    cpusetobj_strprint(cpusetbuf2, &all_cpus));
 
 	/*
 	 * Collapse nonsense levels that may be created out of convenience by
 	 * the MD layers.  They cause extra work in the search functions.
 	 */
 	while (top->cg_children == 1) {
 		top = &top->cg_child[0];
 		top->cg_parent = NULL;
 	}
 	return (top);
 }
 
 struct cpu_group *
 smp_topo_alloc(u_int count)
 {
 	static u_int index;
 	u_int curr;
 
 	curr = index;
 	index += count;
 	return (&group[curr]);
 }
 
 struct cpu_group *
 smp_topo_none(void)
 {
 	struct cpu_group *top;
 
 	top = &group[0];
 	top->cg_parent = NULL;
 	top->cg_child = NULL;
 	top->cg_mask = all_cpus;
 	top->cg_count = mp_ncpus;
 	top->cg_children = 0;
 	top->cg_level = CG_SHARE_NONE;
 	top->cg_flags = 0;
 	
 	return (top);
 }
 
 static int
 smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
     int count, int flags, int start)
 {
 	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
 	cpuset_t mask;
 	int i;
 
 	CPU_ZERO(&mask);
 	for (i = 0; i < count; i++, start++)
 		CPU_SET(start, &mask);
 	child->cg_parent = parent;
 	child->cg_child = NULL;
 	child->cg_children = 0;
 	child->cg_level = share;
 	child->cg_count = count;
 	child->cg_flags = flags;
 	child->cg_mask = mask;
 	parent->cg_children++;
 	for (; parent != NULL; parent = parent->cg_parent) {
 		if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
 			panic("Duplicate children in %p.  mask (%s) child (%s)",
 			    parent,
 			    cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
 			    cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
 		CPU_OR(&parent->cg_mask, &child->cg_mask);
 		parent->cg_count += child->cg_count;
 	}
 
 	return (start);
 }
 
 struct cpu_group *
 smp_topo_1level(int share, int count, int flags)
 {
 	struct cpu_group *child;
 	struct cpu_group *top;
 	int packages;
 	int cpu;
 	int i;
 
 	cpu = 0;
 	top = &group[0];
 	packages = mp_ncpus / count;
 	top->cg_child = child = &group[1];
 	top->cg_level = CG_SHARE_NONE;
 	for (i = 0; i < packages; i++, child++)
 		cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
 	return (top);
 }
 
 struct cpu_group *
 smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
     int l1flags)
 {
 	struct cpu_group *top;
 	struct cpu_group *l1g;
 	struct cpu_group *l2g;
 	int cpu;
 	int i;
 	int j;
 
 	cpu = 0;
 	top = &group[0];
 	l2g = &group[1];
 	top->cg_child = l2g;
 	top->cg_level = CG_SHARE_NONE;
 	top->cg_children = mp_ncpus / (l2count * l1count);
 	l1g = l2g + top->cg_children;
 	for (i = 0; i < top->cg_children; i++, l2g++) {
 		l2g->cg_parent = top;
 		l2g->cg_child = l1g;
 		l2g->cg_level = l2share;
 		for (j = 0; j < l2count; j++, l1g++)
 			cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
 			    l1flags, cpu);
 	}
 	return (top);
 }
 
 
 struct cpu_group *
 smp_topo_find(struct cpu_group *top, int cpu)
 {
 	struct cpu_group *cg;
 	cpuset_t mask;
 	int children;
 	int i;
 
 	CPU_SETOF(cpu, &mask);
 	cg = top;
 	for (;;) {
 		if (!CPU_OVERLAP(&cg->cg_mask, &mask))
 			return (NULL);
 		if (cg->cg_children == 0)
 			return (cg);
 		children = cg->cg_children;
 		for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
 			if (CPU_OVERLAP(&cg->cg_mask, &mask))
 				break;
 	}
 	return (NULL);
 }
 #else /* !SMP */
 
 void
 smp_rendezvous_cpus(cpuset_t map,
 	void (*setup_func)(void *), 
 	void (*action_func)(void *),
 	void (*teardown_func)(void *),
 	void *arg)
 {
 	/*
 	 * In the !SMP case we just need to ensure the same initial conditions
 	 * as the SMP case.
 	 */
 	spinlock_enter();
 	if (setup_func != NULL)
 		setup_func(arg);
 	if (action_func != NULL)
 		action_func(arg);
 	if (teardown_func != NULL)
 		teardown_func(arg);
 	spinlock_exit();
 }
 
 void
 smp_rendezvous(void (*setup_func)(void *), 
 	       void (*action_func)(void *),
 	       void (*teardown_func)(void *),
 	       void *arg)
 {
 
 	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func,
 	    arg);
 }
 
 /*
  * Provide dummy SMP support for UP kernels.  Modules that need to use SMP
  * APIs will still work using this dummy support.
  */
 static void
 mp_setvariables_for_up(void *dummy)
 {
 	mp_ncpus = 1;
 	mp_ncores = 1;
 	mp_maxid = PCPU_GET(cpuid);
 	CPU_SETOF(mp_maxid, &all_cpus);
 	KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
 }
 SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
     mp_setvariables_for_up, NULL);
 #endif /* SMP */
 
 void
 smp_no_rendezvous_barrier(void *dummy)
 {
 #ifdef SMP
 	KASSERT((!smp_started),("smp_no_rendezvous called and smp is started"));
 #endif
 }
 
 /*
  * Wait for specified idle threads to switch once.  This ensures that even
  * preempted threads have cycled through the switch function once,
  * exiting their codepaths.  This allows us to change global pointers
  * with no other synchronization.
  */
 int
 quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
 {
 	struct pcpu *pcpu;
 	u_int gen[MAXCPU];
 	int error;
 	int cpu;
 
 	error = 0;
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
 			continue;
 		pcpu = pcpu_find(cpu);
 		gen[cpu] = pcpu->pc_idlethread->td_generation;
 	}
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
 			continue;
 		pcpu = pcpu_find(cpu);
 		thread_lock(curthread);
 		sched_bind(curthread, cpu);
 		thread_unlock(curthread);
 		while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
 			error = tsleep(quiesce_cpus, prio, wmesg, 1);
 			if (error != EWOULDBLOCK)
 				goto out;
 			error = 0;
 		}
 	}
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (error);
 }
 
 int
 quiesce_all_cpus(const char *wmesg, int prio)
 {
 
 	return quiesce_cpus(all_cpus, wmesg, prio);
 }
 
 /* Extra care is taken with this sysctl because the data type is volatile */
 static int
 sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
 {
 	int error, active;
 
 	active = smp_started;
 	error = SYSCTL_OUT(req, &active, sizeof(active));
 	return (error);
 }
 
 
 #ifdef SMP
 void
 topo_init_node(struct topo_node *node)
 {
 
 	bzero(node, sizeof(*node));
 	TAILQ_INIT(&node->children);
 }
 
 void
 topo_init_root(struct topo_node *root)
 {
 
 	topo_init_node(root);
 	root->type = TOPO_TYPE_SYSTEM;
 }
 
 /*
  * Add a child node with the given ID under the given parent.
  * Do nothing if there is already a child with that ID.
  */
 struct topo_node *
 topo_add_node_by_hwid(struct topo_node *parent, int hwid,
     topo_node_type type, uintptr_t subtype)
 {
 	struct topo_node *node;
 
 	TAILQ_FOREACH_REVERSE(node, &parent->children,
 	    topo_children, siblings) {
 		if (node->hwid == hwid
 		    && node->type == type && node->subtype == subtype) {
 			return (node);
 		}
 	}
 
 	node = malloc(sizeof(*node), M_TOPO, M_WAITOK);
 	topo_init_node(node);
 	node->parent = parent;
 	node->hwid = hwid;
 	node->type = type;
 	node->subtype = subtype;
 	TAILQ_INSERT_TAIL(&parent->children, node, siblings);
 	parent->nchildren++;
 
 	return (node);
 }
 
 /*
  * Find a child node with the given ID under the given parent.
  */
 struct topo_node *
 topo_find_node_by_hwid(struct topo_node *parent, int hwid,
     topo_node_type type, uintptr_t subtype)
 {
 
 	struct topo_node *node;
 
 	TAILQ_FOREACH(node, &parent->children, siblings) {
 		if (node->hwid == hwid
 		    && node->type == type && node->subtype == subtype) {
 			return (node);
 		}
 	}
 
 	return (NULL);
 }
 
 /*
  * Given a node change the order of its parent's child nodes such
  * that the node becomes the firt child while preserving the cyclic
  * order of the children.  In other words, the given node is promoted
  * by rotation.
  */
 void
 topo_promote_child(struct topo_node *child)
 {
 	struct topo_node *next;
 	struct topo_node *node;
 	struct topo_node *parent;
 
 	parent = child->parent;
 	next = TAILQ_NEXT(child, siblings);
 	TAILQ_REMOVE(&parent->children, child, siblings);
 	TAILQ_INSERT_HEAD(&parent->children, child, siblings);
 
 	while (next != NULL) {
 		node = next;
 		next = TAILQ_NEXT(node, siblings);
 		TAILQ_REMOVE(&parent->children, node, siblings);
 		TAILQ_INSERT_AFTER(&parent->children, child, node, siblings);
 		child = node;
 	}
 }
 
 /*
  * Iterate to the next node in the depth-first search (traversal) of
  * the topology tree.
  */
 struct topo_node *
 topo_next_node(struct topo_node *top, struct topo_node *node)
 {
 	struct topo_node *next;
 
 	if ((next = TAILQ_FIRST(&node->children)) != NULL)
 		return (next);
 
 	if ((next = TAILQ_NEXT(node, siblings)) != NULL)
 		return (next);
 
 	while (node != top && (node = node->parent) != top)
 		if ((next = TAILQ_NEXT(node, siblings)) != NULL)
 			return (next);
 
 	return (NULL);
 }
 
 /*
  * Iterate to the next node in the depth-first search of the topology tree,
  * but without descending below the current node.
  */
 struct topo_node *
 topo_next_nonchild_node(struct topo_node *top, struct topo_node *node)
 {
 	struct topo_node *next;
 
 	if ((next = TAILQ_NEXT(node, siblings)) != NULL)
 		return (next);
 
 	while (node != top && (node = node->parent) != top)
 		if ((next = TAILQ_NEXT(node, siblings)) != NULL)
 			return (next);
 
 	return (NULL);
 }
 
 /*
  * Assign the given ID to the given topology node that represents a logical
  * processor.
  */
 void
 topo_set_pu_id(struct topo_node *node, cpuid_t id)
 {
 
 	KASSERT(node->type == TOPO_TYPE_PU,
 	    ("topo_set_pu_id: wrong node type: %u", node->type));
 	KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0,
 	    ("topo_set_pu_id: cpuset already not empty"));
 	node->id = id;
 	CPU_SET(id, &node->cpuset);
 	node->cpu_count = 1;
 	node->subtype = 1;
 
 	while ((node = node->parent) != NULL) {
 		KASSERT(!CPU_ISSET(id, &node->cpuset),
 		    ("logical ID %u is already set in node %p", id, node));
 		CPU_SET(id, &node->cpuset);
 		node->cpu_count++;
 	}
 }
 
 static struct topology_spec {
 	topo_node_type	type;
 	bool		match_subtype;
 	uintptr_t	subtype;
 } topology_level_table[TOPO_LEVEL_COUNT] = {
 	[TOPO_LEVEL_PKG] = { .type = TOPO_TYPE_PKG, },
 	[TOPO_LEVEL_GROUP] = { .type = TOPO_TYPE_GROUP, },
 	[TOPO_LEVEL_CACHEGROUP] = {
 		.type = TOPO_TYPE_CACHE,
 		.match_subtype = true,
 		.subtype = CG_SHARE_L3,
 	},
 	[TOPO_LEVEL_CORE] = { .type = TOPO_TYPE_CORE, },
 	[TOPO_LEVEL_THREAD] = { .type = TOPO_TYPE_PU, },
 };
 
 static bool
 topo_analyze_table(struct topo_node *root, int all, enum topo_level level,
     struct topo_analysis *results)
 {
 	struct topology_spec *spec;
 	struct topo_node *node;
 	int count;
 
 	if (level >= TOPO_LEVEL_COUNT)
 		return (true);
 
 	spec = &topology_level_table[level];
 	count = 0;
 	node = topo_next_node(root, root);
 
 	while (node != NULL) {
 		if (node->type != spec->type ||
 		    (spec->match_subtype && node->subtype != spec->subtype)) {
 			node = topo_next_node(root, node);
 			continue;
 		}
 		if (!all && CPU_EMPTY(&node->cpuset)) {
 			node = topo_next_nonchild_node(root, node);
 			continue;
 		}
 
 		count++;
 
 		if (!topo_analyze_table(node, all, level + 1, results))
 			return (false);
 
 		node = topo_next_nonchild_node(root, node);
 	}
 
 	/* No explicit subgroups is essentially one subgroup. */
 	if (count == 0) {
 		count = 1;
 
 		if (!topo_analyze_table(root, all, level + 1, results))
 			return (false);
 	}
 
 	if (results->entities[level] == -1)
 		results->entities[level] = count;
 	else if (results->entities[level] != count)
 		return (false);
 
 	return (true);
 }
 
 /*
  * Check if the topology is uniform, that is, each package has the same number
  * of cores in it and each core has the same number of threads (logical
  * processors) in it.  If so, calculate the number of packages, the number of
  * groups per package, the number of cachegroups per group, and the number of
  * logical processors per cachegroup.  'all' parameter tells whether to include
  * administratively disabled logical processors into the analysis.
  */
 int
 topo_analyze(struct topo_node *topo_root, int all,
     struct topo_analysis *results)
 {
 
 	results->entities[TOPO_LEVEL_PKG] = -1;
 	results->entities[TOPO_LEVEL_CORE] = -1;
 	results->entities[TOPO_LEVEL_THREAD] = -1;
 	results->entities[TOPO_LEVEL_GROUP] = -1;
 	results->entities[TOPO_LEVEL_CACHEGROUP] = -1;
 
 	if (!topo_analyze_table(topo_root, all, TOPO_LEVEL_PKG, results))
 		return (0);
 
 	KASSERT(results->entities[TOPO_LEVEL_PKG] > 0,
 		("bug in topology or analysis"));
 
 	return (1);
 }
 
 #endif /* SMP */
 
Index: head/sys/sys/_rwlock.h
===================================================================
--- head/sys/sys/_rwlock.h	(revision 344854)
+++ head/sys/sys/_rwlock.h	(revision 344855)
@@ -1,64 +1,63 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS__RWLOCK_H_
 #define	_SYS__RWLOCK_H_
 
 #include <machine/param.h>
 
 /*
  * Reader/writer lock.
  *
  * All reader/writer lock implementations must always have a member
  * called rw_lock.  Other locking primitive structures are not allowed to
  * use this name for their members.
  * If this rule needs to change, the bits in the reader/writer lock
  * implementation must be modified appropriately.
  */
 struct rwlock {
 	struct lock_object	lock_object;
 	volatile uintptr_t	rw_lock;
 };
 
 /*
  * Members of struct rwlock_padalign must mirror members of struct rwlock.
  * rwlock_padalign rwlocks can use the rwlock(9) API transparently without
  * modification.
  * Pad-aligned rwlocks used within structures should generally be the
  * first member of the struct.  Otherwise, the compiler can generate
  * additional padding for the struct to keep a correct alignment for
  * the rwlock.
  */
 struct rwlock_padalign {
 	struct lock_object	lock_object;
 	volatile uintptr_t	rw_lock;
 } __aligned(CACHE_LINE_SIZE);
 
 #endif /* !_SYS__RWLOCK_H_ */
Index: head/sys/sys/refcount.h
===================================================================
--- head/sys/sys/refcount.h	(revision 344854)
+++ head/sys/sys/refcount.h	(revision 344855)
@@ -1,113 +1,112 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __SYS_REFCOUNT_H__
 #define __SYS_REFCOUNT_H__
 
 #include <sys/limits.h>
 #include <machine/atomic.h>
 
 #ifdef _KERNEL
 #include <sys/systm.h>
 #else
 #define	KASSERT(exp, msg)	/* */
 #endif
 
 static __inline void
 refcount_init(volatile u_int *count, u_int value)
 {
 
 	*count = value;
 }
 
 static __inline void
 refcount_acquire(volatile u_int *count)
 {
 
 	KASSERT(*count < UINT_MAX, ("refcount %p overflowed", count));
 	atomic_add_int(count, 1);
 }
 
 static __inline int
 refcount_release(volatile u_int *count)
 {
 	u_int old;
 
 	atomic_thread_fence_rel();
 	old = atomic_fetchadd_int(count, -1);
 	KASSERT(old > 0, ("refcount %p is zero", count));
 	if (old > 1)
 		return (0);
 
 	/*
 	 * Last reference.  Signal the user to call the destructor.
 	 *
 	 * Ensure that the destructor sees all updates.  The fence_rel
 	 * at the start of the function synchronized with this fence.
 	 */
 	atomic_thread_fence_acq();
 	return (1);
 }
 
 /*
  * This functions returns non-zero if the refcount was
  * incremented. Else zero is returned.
  */
 static __inline __result_use_check int
 refcount_acquire_if_not_zero(volatile u_int *count)
 {
 	u_int old;
 
 	old = *count;
 	for (;;) {
 		KASSERT(old < UINT_MAX, ("refcount %p overflowed", count));
 		if (old == 0)
 			return (0);
 		if (atomic_fcmpset_int(count, &old, old + 1))
 			return (1);
 	}
 }
 
 static __inline __result_use_check int
 refcount_release_if_not_last(volatile u_int *count)
 {
 	u_int old;
 
 	old = *count;
 	for (;;) {
 		KASSERT(old > 0, ("refcount %p is zero", count));
 		if (old == 1)
 			return (0);
 		if (atomic_fcmpset_int(count, &old, old - 1))
 			return (1);
 	}
 }
 
 #endif	/* ! __SYS_REFCOUNT_H__ */
Index: head/sys/sys/rwlock.h
===================================================================
--- head/sys/sys/rwlock.h	(revision 344854)
+++ head/sys/sys/rwlock.h	(revision 344855)
@@ -1,300 +1,299 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_RWLOCK_H_
 #define _SYS_RWLOCK_H_
 
 #include <sys/_lock.h>
 #include <sys/_rwlock.h>
 #include <sys/lock_profile.h>
 #include <sys/lockstat.h>
 
 #ifdef _KERNEL
 #include <sys/pcpu.h>
 #include <machine/atomic.h>
 #endif
 
 /*
  * The rw_lock field consists of several fields.  The low bit indicates
  * if the lock is locked with a read (shared) or write (exclusive) lock.
  * A value of 0 indicates a write lock, and a value of 1 indicates a read
  * lock.  Bit 1 is a boolean indicating if there are any threads waiting
  * for a read lock.  Bit 2 is a boolean indicating if there are any threads
  * waiting for a write lock.  The rest of the variable's definition is
  * dependent on the value of the first bit.  For a write lock, it is a
  * pointer to the thread holding the lock, similar to the mtx_lock field of
  * mutexes.  For read locks, it is a count of read locks that are held.
  *
  * When the lock is not locked by any thread, it is encoded as a read lock
  * with zero waiters.
  */
 
 #define	RW_LOCK_READ		0x01
 #define	RW_LOCK_READ_WAITERS	0x02
 #define	RW_LOCK_WRITE_WAITERS	0x04
 #define	RW_LOCK_WRITE_SPINNER	0x08
 #define	RW_LOCK_WRITER_RECURSED	0x10
 #define	RW_LOCK_FLAGMASK						\
 	(RW_LOCK_READ | RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS |	\
 	RW_LOCK_WRITE_SPINNER | RW_LOCK_WRITER_RECURSED)
 #define	RW_LOCK_WAITERS		(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)
 
 #define	RW_OWNER(x)		((x) & ~RW_LOCK_FLAGMASK)
 #define	RW_READERS_SHIFT	5
 #define	RW_READERS(x)		(RW_OWNER((x)) >> RW_READERS_SHIFT)
 #define	RW_READERS_LOCK(x)	((x) << RW_READERS_SHIFT | RW_LOCK_READ)
 #define	RW_ONE_READER		(1 << RW_READERS_SHIFT)
 
 #define	RW_UNLOCKED		RW_READERS_LOCK(0)
 #define	RW_DESTROYED		(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)
 
 #ifdef _KERNEL
 
 #define	rw_recurse	lock_object.lo_data
 
 #define	RW_READ_VALUE(x)	((x)->rw_lock)
 
 /* Very simple operations on rw_lock. */
 
 /* Try to obtain a write lock once. */
 #define	_rw_write_lock(rw, tid)						\
 	atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid))
 
 #define	_rw_write_lock_fetch(rw, vp, tid)				\
 	atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid))
 
 /* Release a write lock quickly if there are no waiters. */
 #define	_rw_write_unlock(rw, tid)					\
 	atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
 
 #define	_rw_write_unlock_fetch(rw, tid)					\
 	atomic_fcmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
 
 /*
  * Full lock operations that are suitable to be inlined in non-debug
  * kernels.  If the lock cannot be acquired or released trivially then
  * the work is deferred to another function.
  */
 
 /* Acquire a write lock. */
 #define	__rw_wlock(rw, tid, file, line) do {				\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 	uintptr_t _v = RW_UNLOCKED;					\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||	\
 	    !_rw_write_lock_fetch((rw), &_v, _tid)))			\
 		_rw_wlock_hard((rw), _v, (file), (line));		\
 } while (0)
 
 /* Release a write lock. */
 #define	__rw_wunlock(rw, tid, file, line) do {				\
 	uintptr_t _v = (uintptr_t)(tid);				\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||	\
 	    !_rw_write_unlock_fetch((rw), &_v)))			\
 		_rw_wunlock_hard((rw), _v, (file), (line));		\
 } while (0)
 
 /*
  * Function prototypes.  Routines that start with _ are not part of the
  * external API and should not be called directly.  Wrapper macros should
  * be used instead.
  */
 void	_rw_init_flags(volatile uintptr_t *c, const char *name, int opts);
 void	_rw_destroy(volatile uintptr_t *c);
 void	rw_sysinit(void *arg);
 int	_rw_wowned(const volatile uintptr_t *c);
 void	_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line);
 int	__rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_wlock(volatile uintptr_t *c, const char *file, int line);
 void	_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line);
 void	__rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 void	__rw_rlock(volatile uintptr_t *c, const char *file, int line);
 int	__rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_rlock(volatile uintptr_t *c, const char *file, int line);
 void	_rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 void	_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line);
 void	__rw_wlock_hard(volatile uintptr_t *c, uintptr_t v
 	    LOCK_FILE_LINE_ARG_DEF);
 void	__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v
 	    LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line);
 void	__rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 void	__rw_downgrade(volatile uintptr_t *c, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	__rw_assert(const volatile uintptr_t *c, int what, const char *file,
 	    int line);
 #endif
 
 /*
  * Top-level macros to provide lock cookie once the actual rwlock is passed.
  * They will also prevent passing a malformed object to the rwlock KPI by
  * failing compilation as the rw_lock reserved member will not be found.
  */
 #define	rw_init(rw, n)							\
 	_rw_init_flags(&(rw)->rw_lock, n, 0)
 #define	rw_init_flags(rw, n, o)						\
 	_rw_init_flags(&(rw)->rw_lock, n, o)
 #define	rw_destroy(rw)							\
 	_rw_destroy(&(rw)->rw_lock)
 #define	rw_wowned(rw)							\
 	_rw_wowned(&(rw)->rw_lock)
 #define	_rw_wlock(rw, f, l)						\
 	_rw_wlock_cookie(&(rw)->rw_lock, f, l)
 #define	_rw_try_wlock(rw, f, l)						\
 	__rw_try_wlock(&(rw)->rw_lock, f, l)
 #define	_rw_wunlock(rw, f, l)						\
 	_rw_wunlock_cookie(&(rw)->rw_lock, f, l)
 #define	_rw_try_rlock(rw, f, l)						\
 	__rw_try_rlock(&(rw)->rw_lock, f, l)
 #if LOCK_DEBUG > 0
 #define	_rw_rlock(rw, f, l)						\
 	__rw_rlock(&(rw)->rw_lock, f, l)
 #define	_rw_runlock(rw, f, l)						\
 	_rw_runlock_cookie(&(rw)->rw_lock, f, l)
 #else
 #define	_rw_rlock(rw, f, l)						\
 	__rw_rlock_int((struct rwlock *)rw)
 #define	_rw_runlock(rw, f, l)						\
 	_rw_runlock_cookie_int((struct rwlock *)rw)
 #endif
 #if LOCK_DEBUG > 0
 #define	_rw_wlock_hard(rw, v, f, l)					\
 	__rw_wlock_hard(&(rw)->rw_lock, v, f, l)
 #define	_rw_wunlock_hard(rw, v, f, l)					\
 	__rw_wunlock_hard(&(rw)->rw_lock, v, f, l)
 #define	_rw_try_upgrade(rw, f, l)					\
 	__rw_try_upgrade(&(rw)->rw_lock, f, l)
 #define	_rw_downgrade(rw, f, l)						\
 	__rw_downgrade(&(rw)->rw_lock, f, l)
 #else
 #define	_rw_wlock_hard(rw, v, f, l)					\
 	__rw_wlock_hard(&(rw)->rw_lock, v)
 #define	_rw_wunlock_hard(rw, v, f, l)					\
 	__rw_wunlock_hard(&(rw)->rw_lock, v)
 #define	_rw_try_upgrade(rw, f, l)					\
 	__rw_try_upgrade_int(rw)
 #define	_rw_downgrade(rw, f, l)						\
 	__rw_downgrade_int(rw)
 #endif
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	_rw_assert(rw, w, f, l)						\
 	__rw_assert(&(rw)->rw_lock, w, f, l)
 #endif
 
 
 /*
  * Public interface for lock operations.
  */
 
 #ifndef LOCK_DEBUG
 #error LOCK_DEBUG not defined, include <sys/lock.h> before <sys/rwlock.h>
 #endif
 #if LOCK_DEBUG > 0 || defined(RWLOCK_NOINLINE)
 #define	rw_wlock(rw)		_rw_wlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_wunlock(rw)		_rw_wunlock((rw), LOCK_FILE, LOCK_LINE)
 #else
 #define	rw_wlock(rw)							\
 	__rw_wlock((rw), curthread, LOCK_FILE, LOCK_LINE)
 #define	rw_wunlock(rw)							\
 	__rw_wunlock((rw), curthread, LOCK_FILE, LOCK_LINE)
 #endif
 #define	rw_rlock(rw)		_rw_rlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_runlock(rw)		_rw_runlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_try_rlock(rw)	_rw_try_rlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_try_upgrade(rw)	_rw_try_upgrade((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_try_wlock(rw)	_rw_try_wlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_downgrade(rw)	_rw_downgrade((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_unlock(rw)	do {						\
 	if (rw_wowned(rw))						\
 		rw_wunlock(rw);						\
 	else								\
 		rw_runlock(rw);						\
 } while (0)
 #define	rw_sleep(chan, rw, pri, wmesg, timo)				\
 	_sleep((chan), &(rw)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0, C_HARDCLOCK)
 
 #define	rw_initialized(rw)	lock_initialized(&(rw)->lock_object)
 
 struct rw_args {
 	void		*ra_rw;
 	const char 	*ra_desc;
 	int		ra_flags;
 };
 
 #define	RW_SYSINIT_FLAGS(name, rw, desc, flags)				\
 	static struct rw_args name##_args = {				\
 		(rw),							\
 		(desc),							\
 		(flags),						\
 	};								\
 	SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    rw_sysinit, &name##_args);					\
 	SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock))
 
 #define	RW_SYSINIT(name, rw, desc)	RW_SYSINIT_FLAGS(name, rw, desc, 0)
 
 /*
  * Options passed to rw_init_flags().
  */
 #define	RW_DUPOK	0x01
 #define	RW_NOPROFILE	0x02
 #define	RW_NOWITNESS	0x04
 #define	RW_QUIET	0x08
 #define	RW_RECURSE	0x10
 #define	RW_NEW		0x20
 
 /*
  * The INVARIANTS-enabled rw_assert() functionality.
  *
  * The constants need to be defined for INVARIANT_SUPPORT infrastructure
  * support as _rw_assert() itself uses them and the latter implies that
  * _rw_assert() must build.
  */
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	RA_LOCKED		LA_LOCKED
 #define	RA_RLOCKED		LA_SLOCKED
 #define	RA_WLOCKED		LA_XLOCKED
 #define	RA_UNLOCKED		LA_UNLOCKED
 #define	RA_RECURSED		LA_RECURSED
 #define	RA_NOTRECURSED		LA_NOTRECURSED
 #endif
 
 #ifdef INVARIANTS
 #define	rw_assert(rw, what)	_rw_assert((rw), (what), LOCK_FILE, LOCK_LINE)
 #else
 #define	rw_assert(rw, what)
 #endif
 
 #endif /* _KERNEL */
 #endif /* !_SYS_RWLOCK_H_ */
Index: head/sys/sys/sleepqueue.h
===================================================================
--- head/sys/sys/sleepqueue.h	(revision 344854)
+++ head/sys/sys/sleepqueue.h	(revision 344855)
@@ -1,122 +1,121 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SLEEPQUEUE_H_
 #define _SYS_SLEEPQUEUE_H_
 
 /*
  * Sleep queue interface.  Sleep/wakeup, condition variables, and sx
  * locks use a sleep queue for the queue of threads blocked on a sleep
  * channel.
  *
  * A thread calls sleepq_lock() to lock the sleep queue chain associated
  * with a given wait channel.  A thread can then call call sleepq_add() to
  * add themself onto a sleep queue and call one of the sleepq_wait()
  * functions to actually go to sleep.  If a thread needs to abort a sleep
  * operation it should call sleepq_release() to unlock the associated sleep
  * queue chain lock.  If the thread also needs to remove itself from a queue
  * it just enqueued itself on, it can use sleepq_remove() instead.
  *
  * If the thread only wishes to sleep for a limited amount of time, it can
  * call sleepq_set_timeout() after sleepq_add() to setup a timeout.  It
  * should then use one of the sleepq_timedwait() functions to block.
  *
  * A thread is normally resumed from a sleep queue by either the
  * sleepq_signal() or sleepq_broadcast() functions.  Sleepq_signal() wakes
  * the thread with the highest priority that is sleeping on the specified
  * wait channel.  Sleepq_broadcast() wakes all threads that are sleeping
  * on the specified wait channel.  A thread sleeping in an interruptible
  * sleep can be interrupted by calling sleepq_abort().  A thread can also
  * be removed from a specified sleep queue using the sleepq_remove()
  * function.  Note that the sleep queue chain must first be locked via
  * sleepq_lock() before calling sleepq_abort(), sleepq_broadcast(), or
  * sleepq_signal().  These routines each return a boolean that will be true
  * if at least one swapped-out thread was resumed.  In that case, the caller
  * is responsible for waking up the swapper by calling kick_proc0() after
  * releasing the sleep queue chain lock.
  *
  * Each thread allocates a sleep queue at thread creation via sleepq_alloc()
  * and releases it at thread destruction via sleepq_free().  Note that
  * a sleep queue is not tied to a specific thread and that the sleep queue
  * released at thread destruction may not be the same sleep queue that the
  * thread allocated when it was created.
  *
  * XXX: Some other parts of the kernel such as ithread sleeping may end up
  * using this interface as well (death to TDI_IWAIT!)
  */
 
 struct lock_object;
 struct sleepqueue;
 struct thread;
 
 #ifdef _KERNEL
 
 #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
 #define	SLEEPQ_SLEEP		0x00		/* Used by sleep/wakeup. */
 #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
 #define	SLEEPQ_PAUSE		0x02		/* Used by pause. */
 #define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
 #define	SLEEPQ_LK		0x04		/* Used by a lockmgr. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
 
 void	init_sleepqueues(void);
 int	sleepq_abort(struct thread *td, int intrval);
 void	sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg,
 	    int flags, int queue);
 struct sleepqueue *sleepq_alloc(void);
 int	sleepq_broadcast(void *wchan, int flags, int pri, int queue);
 void	sleepq_chains_remove_matching(bool (*matches)(struct thread *));
 void	sleepq_free(struct sleepqueue *sq);
 void	sleepq_lock(void *wchan);
 struct sleepqueue *sleepq_lookup(void *wchan);
 void	sleepq_release(void *wchan);
 void	sleepq_remove(struct thread *td, void *wchan);
 int	sleepq_remove_matching(struct sleepqueue *sq, int queue,
 	    bool (*matches)(struct thread *), int pri);
 int	sleepq_signal(void *wchan, int flags, int pri, int queue);
 void	sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt,
 	    sbintime_t pr, int flags);
 #define	sleepq_set_timeout(wchan, timo)					\
     sleepq_set_timeout_sbt((wchan), tick_sbt * (timo), 0, C_HARDCLOCK)
 u_int	sleepq_sleepcnt(void *wchan, int queue);
 int	sleepq_timedwait(void *wchan, int pri);
 int	sleepq_timedwait_sig(void *wchan, int pri);
 int	sleepq_type(void *wchan);
 void	sleepq_wait(void *wchan, int pri);
 int	sleepq_wait_sig(void *wchan, int pri);
 
 #ifdef STACK
 struct sbuf;
 int sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
     int *count_stacks_printed);
 #endif
 
 #endif	/* _KERNEL */
 #endif	/* !_SYS_SLEEPQUEUE_H_ */
Index: head/sys/sys/turnstile.h
===================================================================
--- head/sys/sys/turnstile.h	(revision 344854)
+++ head/sys/sys/turnstile.h	(revision 344855)
@@ -1,107 +1,106 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_TURNSTILE_H_
 #define _SYS_TURNSTILE_H_
 
 /*
  * Turnstile interface.  Non-sleepable locks use a turnstile for the
  * queue of threads blocked on them when they are contested.  Each
  * turnstile contains two sub-queues: one for threads waiting for a
  * shared, or read, lock, and one for threads waiting for an
  * exclusive, or write, lock.
  *
  * A thread calls turnstile_chain_lock() to lock the turnstile chain
  * associated with a given lock.  A thread calls turnstile_wait() when
  * the lock is contested to be put on the queue and block.  If a thread
  * calls turnstile_trywait() and decides to retry a lock operation instead
  * of blocking, it should call turnstile_cancel() to unlock the associated
  * turnstile chain lock.
  *
  * When a lock is released, the thread calls turnstile_lookup() to look
  * up the turnstile associated with the given lock in the hash table.  Then
  * it calls either turnstile_signal() or turnstile_broadcast() to mark
  * blocked threads for a pending wakeup.  turnstile_signal() marks the
  * highest priority blocked thread while turnstile_broadcast() marks all
  * blocked threads.  The turnstile_signal() function returns true if the
  * turnstile became empty as a result.  After the higher level code finishes
  * releasing the lock, turnstile_unpend() must be called to wake up the
  * pending thread(s) and give up ownership of the turnstile.
  *
  * Alternatively, if a thread wishes to relinquish ownership of a lock
  * without waking up any waiters, it may call turnstile_disown().
  *
  * When a lock is acquired that already has at least one thread contested
  * on it, the new owner of the lock must claim ownership of the turnstile
  * via turnstile_claim().
  *
  * Each thread allocates a turnstile at thread creation via turnstile_alloc()
  * and releases it at thread destruction via turnstile_free().  Note that
  * a turnstile is not tied to a specific thread and that the turnstile
  * released at thread destruction may not be the same turnstile that the
  * thread allocated when it was created.
  *
  * The highest priority thread blocked on a specified queue of a
  * turnstile can be obtained via turnstile_head().  A given queue can
  * also be queried to see if it is empty via turnstile_empty().
  */
 
 struct lock_object;
 struct thread;
 struct turnstile;
 
 #ifdef _KERNEL
 
 /* Which queue to block on or which queue to wakeup one or more threads from. */
 #define	TS_EXCLUSIVE_QUEUE	0
 #define	TS_SHARED_QUEUE		1
 
 void	init_turnstiles(void);
 void	turnstile_adjust(struct thread *, u_char);
 struct turnstile *turnstile_alloc(void);
 void	turnstile_broadcast(struct turnstile *, int);
 void	turnstile_cancel(struct turnstile *);
 void	turnstile_chain_lock(struct lock_object *);
 void	turnstile_chain_unlock(struct lock_object *);
 void	turnstile_claim(struct turnstile *);
 void	turnstile_disown(struct turnstile *);
 int	turnstile_empty(struct turnstile *ts, int queue);
 void	turnstile_free(struct turnstile *);
 struct thread *turnstile_head(struct turnstile *, int);
 struct turnstile *turnstile_lookup(struct lock_object *);
 int	turnstile_signal(struct turnstile *, int);
 struct turnstile *turnstile_trywait(struct lock_object *);
 void	turnstile_unpend(struct turnstile *);
 void	turnstile_wait(struct turnstile *, struct thread *, int);
 struct thread *turnstile_lock(struct turnstile *, struct lock_object **);
 void	turnstile_unlock(struct turnstile *, struct lock_object *);
 void	turnstile_assert(struct turnstile *);
 #endif	/* _KERNEL */
 #endif	/* _SYS_TURNSTILE_H_ */
Index: head/sys/x86/acpica/madt.c
===================================================================
--- head/sys/x86/acpica/madt.c	(revision 344854)
+++ head/sys/x86/acpica/madt.c	(revision 344855)
@@ -1,762 +1,761 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <machine/md_var.h>
 #include <x86/vmware.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/aclocal.h>
 #include <contrib/dev/acpica/include/actables.h>
 
 #include <dev/acpica/acpivar.h>
 #include <dev/pci/pcivar.h>
 
 /* These two arrays are indexed by APIC IDs. */
 static struct {
 	void *io_apic;
 	UINT32 io_vector;
 } *ioapics;
 
 static struct lapic_info {
 	u_int la_enabled;
 	u_int la_acpi_id;
 } *lapics;
 
 int madt_found_sci_override;
 static ACPI_TABLE_MADT *madt;
 static vm_paddr_t madt_physaddr;
 static vm_offset_t madt_length;
 
 static MALLOC_DEFINE(M_MADT, "madt_table", "ACPI MADT Table Items");
 
 static enum intr_polarity interrupt_polarity(UINT16 IntiFlags, UINT8 Source);
 static enum intr_trigger interrupt_trigger(UINT16 IntiFlags, UINT8 Source);
 static int	madt_find_cpu(u_int acpi_id, u_int *apic_id);
 static int	madt_find_interrupt(int intr, void **apic, u_int *pin);
 static void	madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg);
 static void	madt_parse_interrupt_override(
 		    ACPI_MADT_INTERRUPT_OVERRIDE *intr);
 static void	madt_parse_ints(ACPI_SUBTABLE_HEADER *entry,
 		    void *arg __unused);
 static void	madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi);
 static void	madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi);
 static int	madt_probe(void);
 static int	madt_probe_cpus(void);
 static void	madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry,
 		    void *arg __unused);
 static void	madt_setup_cpus_handler(ACPI_SUBTABLE_HEADER *entry,
 		    void *arg __unused);
 static void	madt_register(void *dummy);
 static int	madt_setup_local(void);
 static int	madt_setup_io(void);
 static void	madt_walk_table(acpi_subtable_handler *handler, void *arg);
 
 static struct apic_enumerator madt_enumerator = {
 	.apic_name = "MADT",
 	.apic_probe = madt_probe,
 	.apic_probe_cpus = madt_probe_cpus,
 	.apic_setup_local = madt_setup_local,
 	.apic_setup_io = madt_setup_io
 };
 
 /*
  * Look for an ACPI Multiple APIC Description Table ("APIC")
  */
 static int
 madt_probe(void)
 {
 
 	madt_physaddr = acpi_find_table(ACPI_SIG_MADT);
 	if (madt_physaddr == 0)
 		return (ENXIO);
 	return (-50);
 }
 
 /*
  * Run through the MP table enumerating CPUs.
  */
 static int
 madt_probe_cpus(void)
 {
 
 	madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT);
 	madt_length = madt->Header.Length;
 	KASSERT(madt != NULL, ("Unable to re-map MADT"));
 	madt_walk_table(madt_probe_cpus_handler, NULL);
 	acpi_unmap_table(madt);
 	madt = NULL;
 	return (0);
 }
 
 /*
  * Initialize the local APIC on the BSP.
  */
 static int
 madt_setup_local(void)
 {
 	ACPI_TABLE_DMAR *dmartbl;
 	vm_paddr_t dmartbl_physaddr;
 	const char *reason;
 	char *hw_vendor;
 	u_int p[4];
 	int user_x2apic;
 	bool bios_x2apic;
 
 	if ((cpu_feature2 & CPUID2_X2APIC) != 0) {
 		reason = NULL;
 
 		/*
 		 * Automatically detect several configurations where
 		 * x2APIC mode is known to cause troubles.  User can
 		 * override the setting with hw.x2apic_enable tunable.
 		 */
 		dmartbl_physaddr = acpi_find_table(ACPI_SIG_DMAR);
 		if (dmartbl_physaddr != 0) {
 			dmartbl = acpi_map_table(dmartbl_physaddr,
 			    ACPI_SIG_DMAR);
 			if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0)
 				reason = "by DMAR table";
 			acpi_unmap_table(dmartbl);
 		}
 		if (vm_guest == VM_GUEST_VMWARE) {
 			vmware_hvcall(VMW_HVCMD_GETVCPU_INFO, p);
 			if ((p[0] & VMW_VCPUINFO_VCPU_RESERVED) != 0 ||
 			    (p[0] & VMW_VCPUINFO_LEGACY_X2APIC) == 0)
 				reason =
 				    "inside VMWare without intr redirection";
 		} else if (vm_guest == VM_GUEST_XEN) {
 			reason = "due to running under XEN";
 		} else if (vm_guest == VM_GUEST_NO &&
 		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 		    CPUID_TO_MODEL(cpu_id) == 0x2a) {
 			hw_vendor = kern_getenv("smbios.planar.maker");
 			/*
 			 * It seems that some Lenovo and ASUS
 			 * SandyBridge-based notebook BIOSes have a
 			 * bug which prevents booting AP in x2APIC
 			 * mode.  Since the only way to detect mobile
 			 * CPU is to check northbridge pci id, which
 			 * cannot be done that early, disable x2APIC
 			 * for all Lenovo and ASUS SandyBridge
 			 * machines.
 			 */
 			if (hw_vendor != NULL) {
 				if (!strcmp(hw_vendor, "LENOVO") ||
 				    !strcmp(hw_vendor,
 				    "ASUSTeK Computer Inc.")) {
 					reason =
 				    "for a suspected SandyBridge BIOS bug";
 				}
 				freeenv(hw_vendor);
 			}
 		}
 		bios_x2apic = lapic_is_x2apic();
 		if (reason != NULL && bios_x2apic) {
 			if (bootverbose)
 				printf("x2APIC should be disabled %s but "
 				    "already enabled by BIOS; enabling.\n",
 				     reason);
 			reason = NULL;
 		}
 		if (reason == NULL)
 			x2apic_mode = 1;
 		else if (bootverbose)
 			printf("x2APIC available but disabled %s\n", reason);
 		user_x2apic = x2apic_mode;
 		TUNABLE_INT_FETCH("hw.x2apic_enable", &user_x2apic);
 		if (user_x2apic != x2apic_mode) {
 			if (bios_x2apic && !user_x2apic)
 				printf("x2APIC disabled by tunable and "
 				    "enabled by BIOS; ignoring tunable.");
 			else
 				x2apic_mode = user_x2apic;
 		}
 	}
 
 	/*
 	 * Truncate max_apic_id if not in x2APIC mode. Some structures
 	 * will already be allocated with the previous max_apic_id, but
 	 * at least we can prevent wasting more memory elsewhere.
 	 */
 	if (!x2apic_mode)
 		max_apic_id = min(max_apic_id, xAPIC_MAX_APIC_ID);
 
 	madt = pmap_mapbios(madt_physaddr, madt_length);
 	lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_MADT,
 	    M_WAITOK | M_ZERO);
 	madt_walk_table(madt_setup_cpus_handler, NULL);
 
 	lapic_init(madt->Address);
 	printf("ACPI APIC Table: <%.*s %.*s>\n",
 	    (int)sizeof(madt->Header.OemId), madt->Header.OemId,
 	    (int)sizeof(madt->Header.OemTableId), madt->Header.OemTableId);
 
 	/*
 	 * We ignore 64-bit local APIC override entries.  Should we
 	 * perhaps emit a warning here if we find one?
 	 */
 	return (0);
 }
 
 /*
  * Enumerate I/O APICs and setup interrupt sources.
  */
 static int
 madt_setup_io(void)
 {
 	void *ioapic;
 	u_int pin;
 	int i;
 
 	KASSERT(lapics != NULL, ("local APICs not initialized"));
 
 	/* Try to initialize ACPI so that we can access the FADT. */
 	i = acpi_Startup();
 	if (ACPI_FAILURE(i)) {
 		printf("MADT: ACPI Startup failed with %s\n",
 		    AcpiFormatException(i));
 		printf("Try disabling either ACPI or apic support.\n");
 		panic("Using MADT but ACPI doesn't work");
 	}
 
 	ioapics = malloc(sizeof(*ioapics) * (IOAPIC_MAX_ID + 1), M_MADT,
 	    M_WAITOK | M_ZERO);
 
 	/* First, we run through adding I/O APIC's. */
 	madt_walk_table(madt_parse_apics, NULL);
 
 	/* Second, we run through the table tweaking interrupt sources. */
 	madt_walk_table(madt_parse_ints, NULL);
 
 	/*
 	 * If there was not an explicit override entry for the SCI,
 	 * force it to use level trigger and active-low polarity.
 	 */
 	if (!madt_found_sci_override) {
 		if (madt_find_interrupt(AcpiGbl_FADT.SciInterrupt, &ioapic,
 		    &pin) != 0)
 			printf("MADT: Could not find APIC for SCI IRQ %u\n",
 			    AcpiGbl_FADT.SciInterrupt);
 		else {
 			printf(
 	"MADT: Forcing active-low polarity and level trigger for SCI\n");
 			ioapic_set_polarity(ioapic, pin, INTR_POLARITY_LOW);
 			ioapic_set_triggermode(ioapic, pin, INTR_TRIGGER_LEVEL);
 		}
 	}
 
 	/* Third, we register all the I/O APIC's. */
 	for (i = 0; i <= IOAPIC_MAX_ID; i++)
 		if (ioapics[i].io_apic != NULL)
 			ioapic_register(ioapics[i].io_apic);
 
 	/* Finally, we throw the switch to enable the I/O APIC's. */
 	acpi_SetDefaultIntrModel(ACPI_INTR_APIC);
 
 	free(ioapics, M_MADT);
 	ioapics = NULL;
 
 	/* NB: this is the last use of the lapics array. */
 	free(lapics, M_MADT);
 	lapics = NULL;
 
 	return (0);
 }
 
 static void
 madt_register(void *dummy __unused)
 {
 
 	apic_register_enumerator(&madt_enumerator);
 }
 SYSINIT(madt_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, madt_register, NULL);
 
 /*
  * Call the handler routine for each entry in the MADT table.
  */
 static void
 madt_walk_table(acpi_subtable_handler *handler, void *arg)
 {
 
 	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
 	    handler, arg);
 }
 
 static void
 madt_parse_cpu(unsigned int apic_id, unsigned int flags)
 {
 
 	if (!(flags & ACPI_MADT_ENABLED) ||
 #ifdef SMP
 	    mp_ncpus == MAXCPU ||
 #endif
 	    apic_id > MAX_APIC_ID)
 		return;
 
 #ifdef SMP
 	mp_ncpus++;
 	mp_maxid = mp_ncpus - 1;
 #endif
 	max_apic_id = max(apic_id, max_apic_id);
 }
 
 static void
 madt_add_cpu(u_int acpi_id, u_int apic_id, u_int flags)
 {
 	struct lapic_info *la;
 
 	/*
 	 * The MADT does not include a BSP flag, so we have to let the
 	 * MP code figure out which CPU is the BSP on its own.
 	 */
 	if (bootverbose)
 		printf("MADT: Found CPU APIC ID %u ACPI ID %u: %s\n",
 		    apic_id, acpi_id, flags & ACPI_MADT_ENABLED ?
 		    "enabled" : "disabled");
 	if (!(flags & ACPI_MADT_ENABLED))
 		return;
 	if (apic_id > max_apic_id) {
 		printf("MADT: Ignoring local APIC ID %u (too high)\n",
 		    apic_id);
 		return;
 	}
 
 	la = &lapics[apic_id];
 	KASSERT(la->la_enabled == 0, ("Duplicate local APIC ID %u", apic_id));
 	la->la_enabled = 1;
 	la->la_acpi_id = acpi_id;
 	lapic_create(apic_id, 0);
 }
 
 static void
 madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 {
 	ACPI_MADT_LOCAL_APIC *proc;
 	ACPI_MADT_LOCAL_X2APIC *x2apic;
 
 	switch (entry->Type) {
 	case ACPI_MADT_TYPE_LOCAL_APIC:
 		proc = (ACPI_MADT_LOCAL_APIC *)entry;
 		madt_parse_cpu(proc->Id, proc->LapicFlags);
 		break;
 	case ACPI_MADT_TYPE_LOCAL_X2APIC:
 		x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry;
 		madt_parse_cpu(x2apic->LocalApicId, x2apic->LapicFlags);
 		break;
 	}
 }
 
 static void
 madt_setup_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 {
 	ACPI_MADT_LOCAL_APIC *proc;
 	ACPI_MADT_LOCAL_X2APIC *x2apic;
 
 	switch (entry->Type) {
 	case ACPI_MADT_TYPE_LOCAL_APIC:
 		proc = (ACPI_MADT_LOCAL_APIC *)entry;
 		madt_add_cpu(proc->ProcessorId, proc->Id, proc->LapicFlags);
 		break;
 	case ACPI_MADT_TYPE_LOCAL_X2APIC:
 		x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry;
 		madt_add_cpu(x2apic->Uid, x2apic->LocalApicId,
 		    x2apic->LapicFlags);
 		break;
 	}
 }
 
 
 /*
  * Add an I/O APIC from an entry in the table.
  */
 static void
 madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg __unused)
 {
 	ACPI_MADT_IO_APIC *apic;
 
 	switch (entry->Type) {
 	case ACPI_MADT_TYPE_IO_APIC:
 		apic = (ACPI_MADT_IO_APIC *)entry;
 		if (bootverbose)
 			printf(
 			    "MADT: Found IO APIC ID %u, Interrupt %u at %p\n",
 			    apic->Id, apic->GlobalIrqBase,
 			    (void *)(uintptr_t)apic->Address);
 		if (apic->Id > IOAPIC_MAX_ID)
 			panic("%s: I/O APIC ID %u too high", __func__,
 			    apic->Id);
 		if (ioapics[apic->Id].io_apic != NULL)
 			panic("%s: Double APIC ID %u", __func__, apic->Id);
 		ioapics[apic->Id].io_apic = ioapic_create(apic->Address,
 		    apic->Id, apic->GlobalIrqBase);
 		ioapics[apic->Id].io_vector = apic->GlobalIrqBase;
 		break;
 	default:
 		break;
 	}
 }
 
 /*
  * Determine properties of an interrupt source.  Note that for ACPI these
  * functions are only used for ISA interrupts, so we assume ISA bus values
  * (Active Hi, Edge Triggered) for conforming values except for the ACPI
  * SCI for which we use Active Lo, Level Triggered.
  */
 static enum intr_polarity
 interrupt_polarity(UINT16 IntiFlags, UINT8 Source)
 {
 
 	switch (IntiFlags & ACPI_MADT_POLARITY_MASK) {
 	default:
 		printf("WARNING: Bogus Interrupt Polarity. Assume CONFORMS\n");
 		/* FALLTHROUGH*/
 	case ACPI_MADT_POLARITY_CONFORMS:
 		if (Source == AcpiGbl_FADT.SciInterrupt)
 			return (INTR_POLARITY_LOW);
 		else
 			return (INTR_POLARITY_HIGH);
 	case ACPI_MADT_POLARITY_ACTIVE_HIGH:
 		return (INTR_POLARITY_HIGH);
 	case ACPI_MADT_POLARITY_ACTIVE_LOW:
 		return (INTR_POLARITY_LOW);
 	}
 }
 
 static enum intr_trigger
 interrupt_trigger(UINT16 IntiFlags, UINT8 Source)
 {
 
 	switch (IntiFlags & ACPI_MADT_TRIGGER_MASK) {
 	default:
 		printf("WARNING: Bogus Interrupt Trigger Mode. Assume CONFORMS.\n");
 		/*FALLTHROUGH*/
 	case ACPI_MADT_TRIGGER_CONFORMS:
 		if (Source == AcpiGbl_FADT.SciInterrupt)
 			return (INTR_TRIGGER_LEVEL);
 		else
 			return (INTR_TRIGGER_EDGE);
 	case ACPI_MADT_TRIGGER_EDGE:
 		return (INTR_TRIGGER_EDGE);
 	case ACPI_MADT_TRIGGER_LEVEL:
 		return (INTR_TRIGGER_LEVEL);
 	}
 }
 
 /*
  * Find the local APIC ID associated with a given ACPI Processor ID.
  */
 static int
 madt_find_cpu(u_int acpi_id, u_int *apic_id)
 {
 	int i;
 
 	for (i = 0; i <= max_apic_id; i++) {
 		if (!lapics[i].la_enabled)
 			continue;
 		if (lapics[i].la_acpi_id != acpi_id)
 			continue;
 		*apic_id = i;
 		return (0);
 	}
 	return (ENOENT);
 }
 
 /*
  * Find the IO APIC and pin on that APIC associated with a given global
  * interrupt.
  */
 static int
 madt_find_interrupt(int intr, void **apic, u_int *pin)
 {
 	int i, best;
 
 	best = -1;
 	for (i = 0; i <= IOAPIC_MAX_ID; i++) {
 		if (ioapics[i].io_apic == NULL ||
 		    ioapics[i].io_vector > intr)
 			continue;
 		if (best == -1 ||
 		    ioapics[best].io_vector < ioapics[i].io_vector)
 			best = i;
 	}
 	if (best == -1)
 		return (ENOENT);
 	*apic = ioapics[best].io_apic;
 	*pin = intr - ioapics[best].io_vector;
 	if (*pin > 32)
 		printf("WARNING: Found intpin of %u for vector %d\n", *pin,
 		    intr);
 	return (0);
 }
 
 void
 madt_parse_interrupt_values(void *entry,
     enum intr_trigger *trig, enum intr_polarity *pol)
 {
 	ACPI_MADT_INTERRUPT_OVERRIDE *intr;
 	char buf[64];
 
 	intr = entry;
 
 	if (bootverbose)
 		printf("MADT: Interrupt override: source %u, irq %u\n",
 		    intr->SourceIrq, intr->GlobalIrq);
 	KASSERT(intr->Bus == 0, ("bus for interrupt overrides must be zero"));
 
 	/*
 	 * Lookup the appropriate trigger and polarity modes for this
 	 * entry.
 	 */
 	*trig = interrupt_trigger(intr->IntiFlags, intr->SourceIrq);
 	*pol = interrupt_polarity(intr->IntiFlags, intr->SourceIrq);
 
 	/*
 	 * If the SCI is identity mapped but has edge trigger and
 	 * active-hi polarity or the force_sci_lo tunable is set,
 	 * force it to use level/lo.
 	 */
 	if (intr->SourceIrq == AcpiGbl_FADT.SciInterrupt) {
 		madt_found_sci_override = 1;
 		if (getenv_string("hw.acpi.sci.trigger", buf, sizeof(buf))) {
 			if (tolower(buf[0]) == 'e')
 				*trig = INTR_TRIGGER_EDGE;
 			else if (tolower(buf[0]) == 'l')
 				*trig = INTR_TRIGGER_LEVEL;
 			else
 				panic(
 				"Invalid trigger %s: must be 'edge' or 'level'",
 				    buf);
 			printf("MADT: Forcing SCI to %s trigger\n",
 			    *trig == INTR_TRIGGER_EDGE ? "edge" : "level");
 		}
 		if (getenv_string("hw.acpi.sci.polarity", buf, sizeof(buf))) {
 			if (tolower(buf[0]) == 'h')
 				*pol = INTR_POLARITY_HIGH;
 			else if (tolower(buf[0]) == 'l')
 				*pol = INTR_POLARITY_LOW;
 			else
 				panic(
 				"Invalid polarity %s: must be 'high' or 'low'",
 				    buf);
 			printf("MADT: Forcing SCI to active %s polarity\n",
 			    *pol == INTR_POLARITY_HIGH ? "high" : "low");
 		}
 	}
 }
 
 /*
  * Parse an interrupt source override for an ISA interrupt.
  */
 static void
 madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr)
 {
 	void *new_ioapic, *old_ioapic;
 	u_int new_pin, old_pin;
 	enum intr_trigger trig;
 	enum intr_polarity pol;
 
 	if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 &&
 	    intr->GlobalIrq == 2) {
 		if (bootverbose)
 			printf("MADT: Skipping timer override\n");
 		return;
 	}
 
 	if (madt_find_interrupt(intr->GlobalIrq, &new_ioapic, &new_pin) != 0) {
 		printf("MADT: Could not find APIC for vector %u (IRQ %u)\n",
 		    intr->GlobalIrq, intr->SourceIrq);
 		return;
 	}
 
 	madt_parse_interrupt_values(intr, &trig, &pol);
 
 	/* Remap the IRQ if it is mapped to a different interrupt vector. */
 	if (intr->SourceIrq != intr->GlobalIrq) {
 		/*
 		 * If the SCI is remapped to a non-ISA global interrupt,
 		 * then override the vector we use to setup and allocate
 		 * the interrupt.
 		 */
 		if (intr->GlobalIrq > 15 &&
 		    intr->SourceIrq == AcpiGbl_FADT.SciInterrupt)
 			acpi_OverrideInterruptLevel(intr->GlobalIrq);
 		else
 			ioapic_remap_vector(new_ioapic, new_pin,
 			    intr->SourceIrq);
 		if (madt_find_interrupt(intr->SourceIrq, &old_ioapic,
 		    &old_pin) != 0)
 			printf("MADT: Could not find APIC for source IRQ %u\n",
 			    intr->SourceIrq);
 		else if (ioapic_get_vector(old_ioapic, old_pin) ==
 		    intr->SourceIrq)
 			ioapic_disable_pin(old_ioapic, old_pin);
 	}
 
 	/* Program the polarity and trigger mode. */
 	ioapic_set_triggermode(new_ioapic, new_pin, trig);
 	ioapic_set_polarity(new_ioapic, new_pin, pol);
 }
 
 /*
  * Parse an entry for an NMI routed to an IO APIC.
  */
 static void
 madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi)
 {
 	void *ioapic;
 	u_int pin;
 
 	if (madt_find_interrupt(nmi->GlobalIrq, &ioapic, &pin) != 0) {
 		printf("MADT: Could not find APIC for vector %u\n",
 		    nmi->GlobalIrq);
 		return;
 	}
 
 	ioapic_set_nmi(ioapic, pin);
 	if (!(nmi->IntiFlags & ACPI_MADT_TRIGGER_CONFORMS))
 		ioapic_set_triggermode(ioapic, pin,
 		    interrupt_trigger(nmi->IntiFlags, 0));
 	if (!(nmi->IntiFlags & ACPI_MADT_POLARITY_CONFORMS))
 		ioapic_set_polarity(ioapic, pin,
 		    interrupt_polarity(nmi->IntiFlags, 0));
 }
 
 /*
  * Parse an entry for an NMI routed to a local APIC LVT pin.
  */
 static void
 madt_handle_local_nmi(u_int acpi_id, UINT8 Lint, UINT16 IntiFlags)
 {
 	u_int apic_id, pin;
 
 	if (acpi_id == 0xffffffff)
 		apic_id = APIC_ID_ALL;
 	else if (madt_find_cpu(acpi_id, &apic_id) != 0) {
 		if (bootverbose)
 			printf("MADT: Ignoring local NMI routed to "
 			    "ACPI CPU %u\n", acpi_id);
 		return;
 	}
 	if (Lint == 0)
 		pin = APIC_LVT_LINT0;
 	else
 		pin = APIC_LVT_LINT1;
 	lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI);
 	if (!(IntiFlags & ACPI_MADT_TRIGGER_CONFORMS))
 		lapic_set_lvt_triggermode(apic_id, pin,
 		    interrupt_trigger(IntiFlags, 0));
 	if (!(IntiFlags & ACPI_MADT_POLARITY_CONFORMS))
 		lapic_set_lvt_polarity(apic_id, pin,
 		    interrupt_polarity(IntiFlags, 0));
 }
 
 static void
 madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi)
 {
 
 	madt_handle_local_nmi(nmi->ProcessorId == 0xff ? 0xffffffff :
 	    nmi->ProcessorId, nmi->Lint, nmi->IntiFlags);
 }
 
 static void
 madt_parse_local_x2apic_nmi(ACPI_MADT_LOCAL_X2APIC_NMI *nmi)
 {
 
 	madt_handle_local_nmi(nmi->Uid, nmi->Lint, nmi->IntiFlags);
 }
 
 /*
  * Parse interrupt entries.
  */
 static void
 madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused)
 {
 
 	switch (entry->Type) {
 	case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE:
 		madt_parse_interrupt_override(
 			(ACPI_MADT_INTERRUPT_OVERRIDE *)entry);
 		break;
 	case ACPI_MADT_TYPE_NMI_SOURCE:
 		madt_parse_nmi((ACPI_MADT_NMI_SOURCE *)entry);
 		break;
 	case ACPI_MADT_TYPE_LOCAL_APIC_NMI:
 		madt_parse_local_nmi((ACPI_MADT_LOCAL_APIC_NMI *)entry);
 		break;
 	case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI:
 		madt_parse_local_x2apic_nmi(
 		    (ACPI_MADT_LOCAL_X2APIC_NMI *)entry);
 		break;
 	}
 }
 
 /*
  * Setup per-CPU ACPI IDs.
  */
 static void
 madt_set_ids(void *dummy)
 {
 	struct lapic_info *la;
 	struct pcpu *pc;
 	u_int i;
 
 	if (madt == NULL)
 		return;
 
 	KASSERT(lapics != NULL, ("local APICs not initialized"));
 
 	CPU_FOREACH(i) {
 		pc = pcpu_find(i);
 		KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
 		la = &lapics[pc->pc_apic_id];
 		if (!la->la_enabled)
 			panic("APIC: CPU with APIC ID %u is not enabled",
 			    pc->pc_apic_id);
 		pc->pc_acpi_id = la->la_acpi_id;
 		if (bootverbose)
 			printf("APIC: CPU %u has ACPI ID %u\n", i,
 			    la->la_acpi_id);
 	}
 }
 SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_MIDDLE, madt_set_ids, NULL);
Index: head/sys/x86/include/apicvar.h
===================================================================
--- head/sys/x86/include/apicvar.h	(revision 344854)
+++ head/sys/x86/include/apicvar.h	(revision 344855)
@@ -1,492 +1,491 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _X86_APICVAR_H_
 #define _X86_APICVAR_H_
 
 /*
  * Local && I/O APIC variable definitions.
  */
 
 /*
  * Layout of local APIC interrupt vectors:
  *
  *	0xff (255)  +-------------+
  *                  |             | 15 (Spurious / IPIs / Local Interrupts)
  *	0xf0 (240)  +-------------+
  *                  |             | 14 (I/O Interrupts / Timer)
  *	0xe0 (224)  +-------------+
  *                  |             | 13 (I/O Interrupts)
  *	0xd0 (208)  +-------------+
  *                  |             | 12 (I/O Interrupts)
  *	0xc0 (192)  +-------------+
  *                  |             | 11 (I/O Interrupts)
  *	0xb0 (176)  +-------------+
  *                  |             | 10 (I/O Interrupts)
  *	0xa0 (160)  +-------------+
  *                  |             | 9 (I/O Interrupts)
  *	0x90 (144)  +-------------+
  *                  |             | 8 (I/O Interrupts / System Calls)
  *	0x80 (128)  +-------------+
  *                  |             | 7 (I/O Interrupts)
  *	0x70 (112)  +-------------+
  *                  |             | 6 (I/O Interrupts)
  *	0x60 (96)   +-------------+
  *                  |             | 5 (I/O Interrupts)
  *	0x50 (80)   +-------------+
  *                  |             | 4 (I/O Interrupts)
  *	0x40 (64)   +-------------+
  *                  |             | 3 (I/O Interrupts)
  *	0x30 (48)   +-------------+
  *                  |             | 2 (ATPIC Interrupts)
  *	0x20 (32)   +-------------+
  *                  |             | 1 (Exceptions, traps, faults, etc.)
  *	0x10 (16)   +-------------+
  *                  |             | 0 (Exceptions, traps, faults, etc.)
  *	0x00 (0)    +-------------+
  *
  * Note: 0x80 needs to be handled specially and not allocated to an
  * I/O device!
  */
 
 #define	xAPIC_MAX_APIC_ID	0xfe
 #define	xAPIC_ID_ALL		0xff
 #define	MAX_APIC_ID		0x200
 #define	APIC_ID_ALL		0xffffffff
 
 #define	IOAPIC_MAX_ID		xAPIC_MAX_APIC_ID
 
 /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */
 #define	APIC_IO_INTS	(IDT_IO_INTS + 16)
 #define	APIC_NUM_IOINTS	191
 
 /* The timer interrupt is used for clock handling and drives hardclock, etc. */
 #define	APIC_TIMER_INT	(APIC_IO_INTS + APIC_NUM_IOINTS)
 
 /*  
  ********************* !!! WARNING !!! ******************************
  * Each local apic has an interrupt receive fifo that is two entries deep
  * for each interrupt priority class (higher 4 bits of interrupt vector).
  * Once the fifo is full the APIC can no longer receive interrupts for this
  * class and sending IPIs from other CPUs will be blocked.
  * To avoid deadlocks there should be no more than two IPI interrupts
  * pending at the same time.
  * Currently this is guaranteed by dividing the IPIs in two groups that have 
  * each at most one IPI interrupt pending. The first group is protected by the
  * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
  * at a time) The second group uses a single interrupt and a bitmap to avoid
  * redundant IPI interrupts.
  */ 
 
 /* Interrupts for local APIC LVT entries other than the timer. */
 #define	APIC_LOCAL_INTS	240
 #define	APIC_ERROR_INT	APIC_LOCAL_INTS
 #define	APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
 #define	APIC_CMC_INT	(APIC_LOCAL_INTS + 2)
 #define	APIC_IPI_INTS	(APIC_LOCAL_INTS + 3)
 
 #define	IPI_RENDEZVOUS	(APIC_IPI_INTS)		/* Inter-CPU rendezvous. */
 #define	IPI_INVLTLB	(APIC_IPI_INTS + 1)	/* TLB Shootdown IPIs */
 #define	IPI_INVLPG	(APIC_IPI_INTS + 2)
 #define	IPI_INVLRNG	(APIC_IPI_INTS + 3)
 #define	IPI_INVLCACHE	(APIC_IPI_INTS + 4)
 /* Vector to handle bitmap based IPIs */
 #define	IPI_BITMAP_VECTOR	(APIC_IPI_INTS + 5) 
 
 /* IPIs handled by IPI_BITMAP_VECTOR */
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2
 #define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 6)	/* Stop CPU until restarted. */
 #define	IPI_SUSPEND	(APIC_IPI_INTS + 7)	/* Suspend CPU until restarted. */
 #define	IPI_DYN_FIRST	(APIC_IPI_INTS + 8)
 #define	IPI_DYN_LAST	(253)			/* IPIs allocated at runtime */
 
 /*
  * IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since
  * it is delivered using an NMI anyways.
  */
 #define	IPI_NMI_FIRST	254
 #define	IPI_TRACE	254			/* Interrupt for tracing. */
 #define	IPI_STOP_HARD	255			/* Stop CPU with a NMI. */
 
 /*
  * The spurious interrupt can share the priority class with the IPIs since
  * it is not a normal interrupt. (Does not use the APIC's interrupt fifo)
  */
 #define	APIC_SPURIOUS_INT 255
 
 #ifndef LOCORE
 
 #define	APIC_IPI_DEST_SELF	-1
 #define	APIC_IPI_DEST_ALL	-2
 #define	APIC_IPI_DEST_OTHERS	-3
 
 #define	APIC_BUS_UNKNOWN	-1
 #define	APIC_BUS_ISA		0
 #define	APIC_BUS_EISA		1
 #define	APIC_BUS_PCI		2
 #define	APIC_BUS_MAX		APIC_BUS_PCI
 
 #define	IRQ_EXTINT		-1
 #define	IRQ_NMI			-2
 #define	IRQ_SMI			-3
 #define	IRQ_DISABLED		-4
 
 /*
  * An APIC enumerator is a pseudo bus driver that enumerates APIC's including
  * CPU's and I/O APIC's.
  */
 struct apic_enumerator {
 	const char *apic_name;
 	int (*apic_probe)(void);
 	int (*apic_probe_cpus)(void);
 	int (*apic_setup_local)(void);
 	int (*apic_setup_io)(void);
 	SLIST_ENTRY(apic_enumerator) apic_next;
 };
 
 inthand_t
 	IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
 	IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
 	IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
 	IDTVEC(spuriousint), IDTVEC(timerint),
 	IDTVEC(apic_isr1_pti), IDTVEC(apic_isr2_pti), IDTVEC(apic_isr3_pti),
 	IDTVEC(apic_isr4_pti), IDTVEC(apic_isr5_pti), IDTVEC(apic_isr6_pti),
 	IDTVEC(apic_isr7_pti), IDTVEC(cmcint_pti), IDTVEC(errorint_pti),
 	IDTVEC(spuriousint_pti), IDTVEC(timerint_pti);
 
 extern vm_paddr_t lapic_paddr;
 extern int *apic_cpuids;
 
 void	apic_register_enumerator(struct apic_enumerator *enumerator);
 void	*ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);
 void	ioapic_register(void *cookie);
 int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
 int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
 int	ioapic_set_extint(void *cookie, u_int pin);
 int	ioapic_set_nmi(void *cookie, u_int pin);
 int	ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol);
 int	ioapic_set_triggermode(void *cookie, u_int pin,
 	    enum intr_trigger trigger);
 int	ioapic_set_smi(void *cookie, u_int pin);
 
 /*
  * Struct containing pointers to APIC functions whose
  * implementation is run time selectable.
  */
 struct apic_ops {
 	void	(*create)(u_int, int);
 	void	(*init)(vm_paddr_t);
 	void	(*xapic_mode)(void);
 	bool	(*is_x2apic)(void);
 	void	(*setup)(int);
 	void	(*dump)(const char *);
 	void	(*disable)(void);
 	void	(*eoi)(void);
 	int	(*id)(void);
 	int	(*intr_pending)(u_int);
 	void	(*set_logical_id)(u_int, u_int, u_int);
 	u_int	(*cpuid)(u_int);
 
 	/* Vectors */
 	u_int	(*alloc_vector)(u_int, u_int);
 	u_int	(*alloc_vectors)(u_int, u_int *, u_int, u_int);
 	void	(*enable_vector)(u_int, u_int);
 	void	(*disable_vector)(u_int, u_int);
 	void	(*free_vector)(u_int, u_int, u_int);
 
 
 	/* PMC */
 	int	(*enable_pmc)(void);
 	void	(*disable_pmc)(void);
 	void	(*reenable_pmc)(void);
 
 	/* CMC */
 	void	(*enable_cmc)(void);
 
 	/* AMD ELVT */
 	int	(*enable_mca_elvt)(void);
 
 	/* IPI */
 	void	(*ipi_raw)(register_t, u_int);
 	void	(*ipi_vectored)(u_int, int);
 	int	(*ipi_wait)(int);
 	int	(*ipi_alloc)(inthand_t *ipifunc);
 	void	(*ipi_free)(int vector);
 
 	/* LVT */
 	int	(*set_lvt_mask)(u_int, u_int, u_char);
 	int	(*set_lvt_mode)(u_int, u_int, u_int32_t);
 	int	(*set_lvt_polarity)(u_int, u_int, enum intr_polarity);
 	int	(*set_lvt_triggermode)(u_int, u_int, enum intr_trigger);
 };
 
 extern struct apic_ops apic_ops;
 
 static inline void
 lapic_create(u_int apic_id, int boot_cpu)
 {
 
 	apic_ops.create(apic_id, boot_cpu);
 }
 
 static inline void
 lapic_init(vm_paddr_t addr)
 {
 
 	apic_ops.init(addr);
 }
 
 static inline void
 lapic_xapic_mode(void)
 {
 
 	apic_ops.xapic_mode();
 }
 
 static inline bool
 lapic_is_x2apic(void)
 {
 
 	return (apic_ops.is_x2apic());
 }
 
 static inline void
 lapic_setup(int boot)
 {
 
 	apic_ops.setup(boot);
 }
 
 static inline void
 lapic_dump(const char *str)
 {
 
 	apic_ops.dump(str);
 }
 
 static inline void
 lapic_disable(void)
 {
 
 	apic_ops.disable();
 }
 
 static inline void
 lapic_eoi(void)
 {
 
 	apic_ops.eoi();
 }
 
 static inline int
 lapic_id(void)
 {
 
 	return (apic_ops.id());
 }
 
 static inline int
 lapic_intr_pending(u_int vector)
 {
 
 	return (apic_ops.intr_pending(vector));
 }
 
 /* XXX: UNUSED */
 static inline void
 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 
 	apic_ops.set_logical_id(apic_id, cluster, cluster_id);
 }
 
 static inline u_int
 apic_cpuid(u_int apic_id)
 {
 
 	return (apic_ops.cpuid(apic_id));
 }
 
 static inline u_int
 apic_alloc_vector(u_int apic_id, u_int irq)
 {
 
 	return (apic_ops.alloc_vector(apic_id, irq));
 }
 
 static inline u_int
 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 
 	return (apic_ops.alloc_vectors(apic_id, irqs, count, align));
 }
 
 static inline void
 apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	apic_ops.enable_vector(apic_id, vector);
 }
 
 static inline void
 apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	apic_ops.disable_vector(apic_id, vector);
 }
 
 static inline void
 apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 
 	apic_ops.free_vector(apic_id, vector, irq);
 }
 
 static inline int
 lapic_enable_pmc(void)
 {
 
 	return (apic_ops.enable_pmc());
 }
 
 static inline void
 lapic_disable_pmc(void)
 {
 
 	apic_ops.disable_pmc();
 }
 
 static inline void
 lapic_reenable_pmc(void)
 {
 
 	apic_ops.reenable_pmc();
 }
 
 static inline void
 lapic_enable_cmc(void)
 {
 
 	apic_ops.enable_cmc();
 }
 
 static inline int
 lapic_enable_mca_elvt(void)
 {
 
 	return (apic_ops.enable_mca_elvt());
 }
 
 static inline void
 lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 
 	apic_ops.ipi_raw(icrlo, dest);
 }
 
 static inline void
 lapic_ipi_vectored(u_int vector, int dest)
 {
 
 	apic_ops.ipi_vectored(vector, dest);
 }
 
 static inline int
 lapic_ipi_wait(int delay)
 {
 
 	return (apic_ops.ipi_wait(delay));
 }
 
 static inline int
 lapic_ipi_alloc(inthand_t *ipifunc)
 {
 
 	return (apic_ops.ipi_alloc(ipifunc));
 }
 
 static inline void
 lapic_ipi_free(int vector)
 {
 
 	return (apic_ops.ipi_free(vector));
 }
 
 static inline int
 lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked)
 {
 
 	return (apic_ops.set_lvt_mask(apic_id, lvt, masked));
 }
 
 static inline int
 lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
 {
 
 	return (apic_ops.set_lvt_mode(apic_id, lvt, mode));
 }
 
 static inline int
 lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
 {
 
 	return (apic_ops.set_lvt_polarity(apic_id, lvt, pol));
 }
 
 static inline int
 lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
 {
 
 	return (apic_ops.set_lvt_triggermode(apic_id, lvt, trigger));
 }
 
 void	lapic_handle_cmc(void);
 void	lapic_handle_error(void);
 void	lapic_handle_intr(int vector, struct trapframe *frame);
 void	lapic_handle_timer(struct trapframe *frame);
 
 int	ioapic_get_rid(u_int apic_id, uint16_t *ridp);
 
 extern int x2apic_mode;
 extern int lapic_eoi_suppression;
 
 #ifdef _SYS_SYSCTL_H_
 SYSCTL_DECL(_hw_apic);
 #endif
 
 #endif /* !LOCORE */
 #endif /* _X86_APICVAR_H_ */
Index: head/sys/x86/include/intr_machdep.h
===================================================================
--- head/sys/x86/include/intr_machdep.h	(revision 344854)
+++ head/sys/x86/include/intr_machdep.h	(revision 344855)
@@ -1,176 +1,175 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __X86_INTR_MACHDEP_H__
 #define	__X86_INTR_MACHDEP_H__
 
 #ifdef _KERNEL
 
 /*
  * Values used in determining the allocation of IRQ values among
  * different types of I/O interrupts.  These values are used as
  * indices into a interrupt source array to map I/O interrupts to a
  * device interrupt source whether it be a pin on an interrupt
  * controller or an MSI interrupt.  The 16 ISA IRQs are assigned fixed
  * IDT vectors, but all other device interrupts allocate IDT vectors
  * on demand.  Currently we have 191 IDT vectors available for device
  * interrupts on each CPU.  On many systems with I/O APICs, a lot of
  * the IRQs are not used, so the total number of IRQ values reserved
  * can exceed the number of available IDT slots.
  *
  * The first 16 IRQs (0 - 15) are reserved for ISA IRQs.  Interrupt
  * pins on I/O APICs for non-ISA interrupts use IRQ values starting at
  * IRQ 17.  This layout matches the GSI numbering used by ACPI so that
  * IRQ values returned by ACPI methods such as _CRS can be used
  * directly by the ACPI bus driver.
  *
  * MSI interrupts allocate a block of interrupts starting at the end
  * of the I/O APIC range.  When running under the Xen Hypervisor, an
  * additional range of IRQ values are available for binding to event
  * channel events.
  */
 extern u_int first_msi_irq;
 extern u_int num_io_irqs;
 extern u_int num_msi_irqs;
 
 /*
  * Default base address for MSI messages on x86 platforms.
  */
 #define	MSI_INTEL_ADDR_BASE		0xfee00000
 
 #ifndef LOCORE
 
 typedef void inthand_t(void);
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 struct intsrc;
 
 /*
  * Methods that a PIC provides to mask/unmask a given interrupt source,
  * "turn on" the interrupt on the CPU side by setting up an IDT entry, and
  * return the vector associated with this source.
  */
 struct pic {
 	void (*pic_register_sources)(struct pic *);
 	void (*pic_enable_source)(struct intsrc *);
 	void (*pic_disable_source)(struct intsrc *, int);
 	void (*pic_eoi_source)(struct intsrc *);
 	void (*pic_enable_intr)(struct intsrc *);
 	void (*pic_disable_intr)(struct intsrc *);
 	int (*pic_vector)(struct intsrc *);
 	int (*pic_source_pending)(struct intsrc *);
 	void (*pic_suspend)(struct pic *);
 	void (*pic_resume)(struct pic *, bool suspend_cancelled);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
 	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 	void (*pic_reprogram_pin)(struct intsrc *);
 	TAILQ_ENTRY(pic) pics;
 };
 
 /* Flags for pic_disable_source() */
 enum {
 	PIC_EOI,
 	PIC_NO_EOI,
 };
 
 /*
  * An interrupt source.  The upper-layer code uses the PIC methods to
  * control a given source.  The lower-layer PIC drivers can store additional
  * private data in a given interrupt source such as an interrupt pin number
  * or an I/O APIC pointer.
  */
 struct intsrc {
 	struct pic *is_pic;
 	struct intr_event *is_event;
 	u_long *is_count;
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
 	u_int is_domain;
 	u_int is_cpu;
 };
 
 struct trapframe;
 
 #ifdef SMP
 extern cpuset_t intr_cpus;
 #endif
 extern struct mtx icu_lock;
 extern int elcr_found;
 #ifdef SMP
 extern int msix_disable_migration;
 #endif
 
 #ifndef DEV_ATPIC
 void	atpic_reset(void);
 #endif
 /* XXX: The elcr_* prototypes probably belong somewhere else. */
 int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
 #ifdef SMP
 void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
     int domain);
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
 int	intr_config_intr(int vector, enum intr_trigger trig,
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
 u_int	intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
 int	intr_remove_handler(void *cookie);
 void	intr_resume(bool suspend_cancelled);
 void	intr_suspend(void);
 void	intr_reprogram(void);
 void	intrcnt_add(const char *name, u_long **countp);
 void	nexus_add_irq(u_long irq);
 int	msi_alloc(device_t dev, int count, int maxcount, int *irqs);
 void	msi_init(void);
 int	msi_map(int irq, uint64_t *addr, uint32_t *data);
 int	msi_release(int *irqs, int count);
 int	msix_alloc(device_t dev, int *irq);
 int	msix_release(int irq);
 #ifdef XENHVM
 void	xen_intr_alloc_irqs(void);
 #endif
 
 #endif	/* !LOCORE */
 #endif	/* _KERNEL */
 #endif	/* !__X86_INTR_MACHDEP_H__ */
Index: head/sys/x86/isa/atpic.c
===================================================================
--- head/sys/x86/isa/atpic.c	(revision 344854)
+++ head/sys/x86/isa/atpic.c	(revision 344855)
@@ -1,633 +1,632 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * PIC driver for the 8259A Master and Slave PICs in PC/AT machines.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_auto_eoi.h"
 #include "opt_isa.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 
 #include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/resource.h>
 #include <machine/segments.h>
 
 #include <dev/ic/i8259.h>
 #include <x86/isa/icu.h>
 #include <isa/isareg.h>
 #include <isa/isavar.h>
 
 #ifdef __amd64__
 #define	SDT_ATPIC	SDT_SYSIGT
 #define	GSEL_ATPIC	0
 #else
 #define	SDT_ATPIC	SDT_SYS386IGT
 #define	GSEL_ATPIC	GSEL(GCODE_SEL, SEL_KPL)
 #endif
 
 #define	MASTER	0
 #define	SLAVE	1
 
 #define	IMEN_MASK(ai)		(IRQ_MASK((ai)->at_irq))
 
 #define	NUM_ISA_IRQS		16
 
 static void	atpic_init(void *dummy);
 
 inthand_t
 	IDTVEC(atpic_intr0), IDTVEC(atpic_intr1), IDTVEC(atpic_intr2),
 	IDTVEC(atpic_intr3), IDTVEC(atpic_intr4), IDTVEC(atpic_intr5),
 	IDTVEC(atpic_intr6), IDTVEC(atpic_intr7), IDTVEC(atpic_intr8),
 	IDTVEC(atpic_intr9), IDTVEC(atpic_intr10), IDTVEC(atpic_intr11),
 	IDTVEC(atpic_intr12), IDTVEC(atpic_intr13), IDTVEC(atpic_intr14),
 	IDTVEC(atpic_intr15);
 /* XXXKIB i386 uses stubs until pti comes */
 inthand_t
 	IDTVEC(atpic_intr0_pti), IDTVEC(atpic_intr1_pti),
 	IDTVEC(atpic_intr2_pti), IDTVEC(atpic_intr3_pti),
 	IDTVEC(atpic_intr4_pti), IDTVEC(atpic_intr5_pti),
 	IDTVEC(atpic_intr6_pti), IDTVEC(atpic_intr7_pti),
 	IDTVEC(atpic_intr8_pti), IDTVEC(atpic_intr9_pti),
 	IDTVEC(atpic_intr10_pti), IDTVEC(atpic_intr11_pti),
 	IDTVEC(atpic_intr12_pti), IDTVEC(atpic_intr13_pti),
 	IDTVEC(atpic_intr14_pti), IDTVEC(atpic_intr15_pti);
 
 #define	IRQ(ap, ai)	((ap)->at_irqbase + (ai)->at_irq)
 
 #define	ATPIC(io, base, eoi) {						\
 		.at_pic = {						\
 			.pic_register_sources = atpic_register_sources,	\
 			.pic_enable_source = atpic_enable_source,	\
 			.pic_disable_source = atpic_disable_source,	\
 			.pic_eoi_source = (eoi),			\
 			.pic_enable_intr = atpic_enable_intr,		\
 			.pic_disable_intr = atpic_disable_intr,		\
 			.pic_vector = atpic_vector,			\
 			.pic_source_pending = atpic_source_pending,	\
 			.pic_resume = atpic_resume,			\
 			.pic_config_intr = atpic_config_intr,		\
 			.pic_assign_cpu = atpic_assign_cpu		\
 		},							\
 		.at_ioaddr = (io),					\
 		.at_irqbase = (base),					\
 		.at_intbase = IDT_IO_INTS + (base),			\
 		.at_imen = 0xff,					\
 	}
 
 #define	INTSRC(irq)							\
 	{ { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ),	\
 	    IDTVEC(atpic_intr ## irq ## _pti), (irq) % 8 }
 
 struct atpic {
 	struct pic at_pic;
 	int	at_ioaddr;
 	int	at_irqbase;
 	uint8_t	at_intbase;
 	uint8_t	at_imen;
 };
 
 struct atpic_intsrc {
 	struct intsrc at_intsrc;
 	inthand_t *at_intr, *at_intr_pti;
 	int	at_irq;			/* Relative to PIC base. */
 	enum intr_trigger at_trigger;
 	u_long	at_count;
 	u_long	at_straycount;
 };
 
 static void atpic_register_sources(struct pic *pic);
 static void atpic_enable_source(struct intsrc *isrc);
 static void atpic_disable_source(struct intsrc *isrc, int eoi);
 static void atpic_eoi_master(struct intsrc *isrc);
 static void atpic_eoi_slave(struct intsrc *isrc);
 static void atpic_enable_intr(struct intsrc *isrc);
 static void atpic_disable_intr(struct intsrc *isrc);
 static int atpic_vector(struct intsrc *isrc);
 static void atpic_resume(struct pic *pic, bool suspend_cancelled);
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
 static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void i8259_init(struct atpic *pic, int slave);
 
 static struct atpic atpics[] = {
 	ATPIC(IO_ICU1, 0, atpic_eoi_master),
 	ATPIC(IO_ICU2, 8, atpic_eoi_slave)
 };
 
 static struct atpic_intsrc atintrs[] = {
 	INTSRC(0),
 	INTSRC(1),
 	INTSRC(2),
 	INTSRC(3),
 	INTSRC(4),
 	INTSRC(5),
 	INTSRC(6),
 	INTSRC(7),
 	INTSRC(8),
 	INTSRC(9),
 	INTSRC(10),
 	INTSRC(11),
 	INTSRC(12),
 	INTSRC(13),
 	INTSRC(14),
 	INTSRC(15),
 };
 
 CTASSERT(nitems(atintrs) == NUM_ISA_IRQS);
 
 static __inline void
 _atpic_eoi_master(struct intsrc *isrc)
 {
 
 	KASSERT(isrc->is_pic == &atpics[MASTER].at_pic,
 	    ("%s: mismatched pic", __func__));
 #ifndef AUTO_EOI_1
 	outb(atpics[MASTER].at_ioaddr, OCW2_EOI);
 #endif
 }
 
 /*
  * The data sheet says no auto-EOI on slave, but it sometimes works.
  * So, if AUTO_EOI_2 is enabled, we use it.
  */
 static __inline void
 _atpic_eoi_slave(struct intsrc *isrc)
 {
 
 	KASSERT(isrc->is_pic == &atpics[SLAVE].at_pic,
 	    ("%s: mismatched pic", __func__));
 #ifndef AUTO_EOI_2
 	outb(atpics[SLAVE].at_ioaddr, OCW2_EOI);
 #ifndef AUTO_EOI_1
 	outb(atpics[MASTER].at_ioaddr, OCW2_EOI);
 #endif
 #endif
 }
 
 static void
 atpic_register_sources(struct pic *pic)
 {
 	struct atpic *ap = (struct atpic *)pic;
 	struct atpic_intsrc *ai;
 	int i;
 
 	/*
 	 * If any of the ISA IRQs have an interrupt source already, then
 	 * assume that the I/O APICs are being used and don't register any
 	 * of our interrupt sources.  This makes sure we don't accidentally
 	 * use mixed mode.  The "accidental" use could otherwise occur on
 	 * machines that route the ACPI SCI interrupt to a different ISA
 	 * IRQ (at least one machine routes it to IRQ 13) thus disabling
 	 * that APIC ISA routing and allowing the ATPIC source for that IRQ
 	 * to leak through.  We used to depend on this feature for routing
 	 * IRQ0 via mixed mode, but now we don't use mixed mode at all.
 	 *
 	 * To avoid the slave not register sources after the master
 	 * registers its sources, register all IRQs when this function is
 	 * called on the master.
 	 */
 	if (ap != &atpics[MASTER])
 		return;
 	for (i = 0; i < NUM_ISA_IRQS; i++)
 		if (intr_lookup_source(i) != NULL)
 			return;
 
 	/* Loop through all interrupt sources and add them. */
 	for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) {
 		if (i == ICU_SLAVEID)
 			continue;
 		intr_register_source(&ai->at_intsrc);
 	}
 }
 
 static void
 atpic_enable_source(struct intsrc *isrc)
 {
 	struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc;
 	struct atpic *ap = (struct atpic *)isrc->is_pic;
 
 	spinlock_enter();
 	if (ap->at_imen & IMEN_MASK(ai)) {
 		ap->at_imen &= ~IMEN_MASK(ai);
 		outb(ap->at_ioaddr + ICU_IMR_OFFSET, ap->at_imen);
 	}
 	spinlock_exit();
 }
 
 static void
 atpic_disable_source(struct intsrc *isrc, int eoi)
 {
 	struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc;
 	struct atpic *ap = (struct atpic *)isrc->is_pic;
 
 	spinlock_enter();
 	if (ai->at_trigger != INTR_TRIGGER_EDGE) {
 		ap->at_imen |= IMEN_MASK(ai);
 		outb(ap->at_ioaddr + ICU_IMR_OFFSET, ap->at_imen);
 	}
 
 	/*
 	 * Take care to call these functions directly instead of through
 	 * a function pointer.  All of the referenced variables should
 	 * still be hot in the cache.
 	 */
 	if (eoi == PIC_EOI) {
 		if (isrc->is_pic == &atpics[MASTER].at_pic)
 			_atpic_eoi_master(isrc);
 		else
 			_atpic_eoi_slave(isrc);
 	}
 
 	spinlock_exit();
 }
 
 static void
 atpic_eoi_master(struct intsrc *isrc)
 {
 #ifndef AUTO_EOI_1
 	spinlock_enter();
 	_atpic_eoi_master(isrc);
 	spinlock_exit();
 #endif
 }
 
 static void
 atpic_eoi_slave(struct intsrc *isrc)
 {
 #ifndef AUTO_EOI_2
 	spinlock_enter();
 	_atpic_eoi_slave(isrc);
 	spinlock_exit();
 #endif
 }
 
 static void
 atpic_enable_intr(struct intsrc *isrc)
 {
 }
 
 static void
 atpic_disable_intr(struct intsrc *isrc)
 {
 }
 
 
 static int
 atpic_vector(struct intsrc *isrc)
 {
 	struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc;
 	struct atpic *ap = (struct atpic *)isrc->is_pic;
 
 	return (IRQ(ap, ai));
 }
 
 static int
 atpic_source_pending(struct intsrc *isrc)
 {
 	struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc;
 	struct atpic *ap = (struct atpic *)isrc->is_pic;
 
 	return (inb(ap->at_ioaddr) & IMEN_MASK(ai));
 }
 
 static void
 atpic_resume(struct pic *pic, bool suspend_cancelled)
 {
 	struct atpic *ap = (struct atpic *)pic;
 
 	i8259_init(ap, ap == &atpics[SLAVE]);
 	if (ap == &atpics[SLAVE] && elcr_found)
 		elcr_resume();
 }
 
 static int
 atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc;
 	u_int vector;
 
 	/* Map conforming values to edge/hi and sanity check the values. */
 	if (trig == INTR_TRIGGER_CONFORM)
 		trig = INTR_TRIGGER_EDGE;
 	if (pol == INTR_POLARITY_CONFORM)
 		pol = INTR_POLARITY_HIGH;
 	vector = atpic_vector(isrc);
 	if ((trig == INTR_TRIGGER_EDGE && pol == INTR_POLARITY_LOW) ||
 	    (trig == INTR_TRIGGER_LEVEL && pol == INTR_POLARITY_HIGH)) {
 		printf(
 		"atpic: Mismatched config for IRQ%u: trigger %s, polarity %s\n",
 		    vector, trig == INTR_TRIGGER_EDGE ? "edge" : "level",
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 		return (EINVAL);
 	}
 
 	/* If there is no change, just return. */
 	if (ai->at_trigger == trig)
 		return (0);
 
 	/*
 	 * Certain IRQs can never be level/lo, so don't try to set them
 	 * that way if asked.  At least some ELCR registers ignore setting
 	 * these bits as well.
 	 */
 	if ((vector == 0 || vector == 1 || vector == 2 || vector == 13) &&
 	    trig == INTR_TRIGGER_LEVEL) {
 		if (bootverbose)
 			printf(
 		"atpic: Ignoring invalid level/low configuration for IRQ%u\n",
 			    vector);
 		return (EINVAL);
 	}
 	if (!elcr_found) {
 		if (bootverbose)
 			printf("atpic: No ELCR to configure IRQ%u as %s\n",
 			    vector, trig == INTR_TRIGGER_EDGE ? "edge/high" :
 			    "level/low");
 		return (ENXIO);
 	}
 	if (bootverbose)
 		printf("atpic: Programming IRQ%u as %s\n", vector,
 		    trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low");
 	spinlock_enter();
 	elcr_write_trigger(atpic_vector(isrc), trig);
 	ai->at_trigger = trig;
 	spinlock_exit();
 	return (0);
 }
 
 static int
 atpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 
 	/*
 	 * 8259A's are only used in UP in which case all interrupts always
 	 * go to the sole CPU and this function shouldn't even be called.
 	 */
 	panic("%s: bad cookie", __func__);
 }
 
 static void
 i8259_init(struct atpic *pic, int slave)
 {
 	int imr_addr;
 
 	/* Reset the PIC and program with next four bytes. */
 	spinlock_enter();
 	outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4);
 	imr_addr = pic->at_ioaddr + ICU_IMR_OFFSET;
 
 	/* Start vector. */
 	outb(imr_addr, pic->at_intbase);
 
 	/*
 	 * Setup slave links.  For the master pic, indicate what line
 	 * the slave is configured on.  For the slave indicate
 	 * which line on the master we are connected to.
 	 */
 	if (slave)
 		outb(imr_addr, ICU_SLAVEID);
 	else
 		outb(imr_addr, IRQ_MASK(ICU_SLAVEID));
 
 	/* Set mode. */
 	if (slave)
 		outb(imr_addr, SLAVE_MODE);
 	else
 		outb(imr_addr, MASTER_MODE);
 
 	/* Set interrupt enable mask. */
 	outb(imr_addr, pic->at_imen);
 
 	/* Reset is finished, default to IRR on read. */
 	outb(pic->at_ioaddr, OCW3_SEL | OCW3_RR);
 
 	/* OCW2_L1 sets priority order to 3-7, 0-2 (com2 first). */
 	if (!slave)
 		outb(pic->at_ioaddr, OCW2_R | OCW2_SL | OCW2_L1);
 
 	spinlock_exit();
 }
 
 void
 atpic_startup(void)
 {
 	struct atpic_intsrc *ai;
 	int i;
 
 	/* Start off with all interrupts disabled. */
 	i8259_init(&atpics[MASTER], 0);
 	i8259_init(&atpics[SLAVE], 1);
 	atpic_enable_source((struct intsrc *)&atintrs[ICU_SLAVEID]);
 
 	/* Install low-level interrupt handlers for all of our IRQs. */
 	for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) {
 		if (i == ICU_SLAVEID)
 			continue;
 		ai->at_intsrc.is_count = &ai->at_count;
 		ai->at_intsrc.is_straycount = &ai->at_straycount;
 		setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase +
 		    ai->at_irq, pti ? ai->at_intr_pti : ai->at_intr, SDT_ATPIC,
 		    SEL_KPL, GSEL_ATPIC);
 	}
 
 	/*
 	 * Look for an ELCR.  If we find one, update the trigger modes.
 	 * If we don't find one, assume that IRQs 0, 1, 2, and 13 are
 	 * edge triggered and that everything else is level triggered.
 	 * We only use the trigger information to reprogram the ELCR if
 	 * we have one and as an optimization to avoid masking edge
 	 * triggered interrupts.  For the case that we don't have an ELCR,
 	 * it doesn't hurt to mask an edge triggered interrupt, so we
 	 * assume level trigger for any interrupt that we aren't sure is
 	 * edge triggered.
 	 */
 	if (elcr_found) {
 		for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++)
 			ai->at_trigger = elcr_read_trigger(i);
 	} else {
 		for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++)
 			switch (i) {
 			case 0:
 			case 1:
 			case 2:
 			case 8:
 			case 13:
 				ai->at_trigger = INTR_TRIGGER_EDGE;
 				break;
 			default:
 				ai->at_trigger = INTR_TRIGGER_LEVEL;
 				break;
 			}
 	}
 }
 
 static void
 atpic_init(void *dummy __unused)
 {
 
 	/*
 	 * Register our PICs, even if we aren't going to use any of their
 	 * pins so that they are suspended and resumed.
 	 */
 	if (intr_register_pic(&atpics[0].at_pic) != 0 ||
 	    intr_register_pic(&atpics[1].at_pic) != 0)
 		panic("Unable to register ATPICs");
 
 	if (num_io_irqs == 0)
 		num_io_irqs = NUM_ISA_IRQS;
 }
 SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_FOURTH, atpic_init, NULL);
 
 void
 atpic_handle_intr(u_int vector, struct trapframe *frame)
 {
 	struct intsrc *isrc;
 
 	KASSERT(vector < NUM_ISA_IRQS, ("unknown int %u\n", vector));
 	isrc = &atintrs[vector].at_intsrc;
 
 	/*
 	 * If we don't have an event, see if this is a spurious
 	 * interrupt.
 	 */
 	if (isrc->is_event == NULL && (vector == 7 || vector == 15)) {
 		int port, isr;
 
 		/*
 		 * Read the ISR register to see if IRQ 7/15 is really
 		 * pending.  Reset read register back to IRR when done.
 		 */
 		port = ((struct atpic *)isrc->is_pic)->at_ioaddr;
 		spinlock_enter();
 		outb(port, OCW3_SEL | OCW3_RR | OCW3_RIS);
 		isr = inb(port);
 		outb(port, OCW3_SEL | OCW3_RR);
 		spinlock_exit();
 		if ((isr & IRQ_MASK(7)) == 0)
 			return;
 	}
 	intr_execute_handlers(isrc, frame);
 }
 
 #ifdef DEV_ISA
 /*
  * Bus attachment for the ISA PIC.
  */
 static struct isa_pnp_id atpic_ids[] = {
 	{ 0x0000d041 /* PNP0000 */, "AT interrupt controller" },
 	{ 0 }
 };
 
 static int
 atpic_probe(device_t dev)
 {
 	int result;
 	
 	result = ISA_PNP_PROBE(device_get_parent(dev), dev, atpic_ids);
 	if (result <= 0)
 		device_quiet(dev);
 	return (result);
 }
 
 /*
  * We might be granted IRQ 2, as this is typically consumed by chaining
  * between the two PIC components.  If we're using the APIC, however,
  * this may not be the case, and as such we should free the resource.
  * (XXX untested)
  *
  * The generic ISA attachment code will handle allocating any other resources
  * that we don't explicitly claim here.
  */
 static int
 atpic_attach(device_t dev)
 {
 	struct resource *res;
 	int rid;
 
 	/* Try to allocate our IRQ and then free it. */
 	rid = 0;
 	res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 0);
 	if (res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, res);
 	return (0);
 }
 
 /*
  * Return a bitmap of the current interrupt requests.  This is 8259-specific
  * and is only suitable for use at probe time.
  */
 intrmask_t
 isa_irq_pending(void)
 {
 	u_char irr1;
 	u_char irr2;
 
 	irr1 = inb(IO_ICU1);
 	irr2 = inb(IO_ICU2);
 	return ((irr2 << 8) | irr1);
 }
 
 static device_method_t atpic_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		atpic_probe),
 	DEVMETHOD(device_attach,	atpic_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	{ 0, 0 }
 };
 
 static driver_t atpic_driver = {
 	"atpic",
 	atpic_methods,
 	1,		/* no softc */
 };
 
 static devclass_t atpic_devclass;
 
 DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0);
 DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0);
 ISA_PNP_INFO(atpic_ids);
 #endif /* DEV_ISA */
Index: head/sys/x86/isa/elcr.c
===================================================================
--- head/sys/x86/isa/elcr.c	(revision 344854)
+++ head/sys/x86/isa/elcr.c	(revision 344855)
@@ -1,138 +1,137 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * The ELCR is a register that controls the trigger mode and polarity of
  * EISA and ISA interrupts.  In FreeBSD 3.x and 4.x, the ELCR was only
  * consulted for determining the appropriate trigger mode of EISA
  * interrupts when using an APIC.  However, it seems that almost all
  * systems that include PCI also include an ELCR that manages the ISA
  * IRQs 0 through 15.  Thus, we check for the presence of an ELCR on
  * every machine by checking to see if the values found at bootup are
  * sane.  Note that the polarity of ISA and EISA IRQs are linked to the
  * trigger mode.  All edge triggered IRQs use active-hi polarity, and
  * all level triggered interrupts use active-lo polarity.
  *
  * The format of the ELCR is simple: it is a 16-bit bitmap where bit 0
  * controls IRQ 0, bit 1 controls IRQ 1, etc.  If the bit is zero, the
  * associated IRQ is edge triggered.  If the bit is one, the IRQ is
  * level triggered.
  */
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <machine/intr_machdep.h>
 
 #define	ELCR_PORT	0x4d0
 #define	ELCR_MASK(irq)	(1 << (irq))
 
 static int elcr_status;
 int elcr_found;
 
 /*
  * Check to see if we have what looks like a valid ELCR.  We do this by
  * verifying that IRQs 0, 1, 2, and 13 are all edge triggered.
  */
 int
 elcr_probe(void)
 {
 	int i;
 
 	elcr_status = inb(ELCR_PORT) | inb(ELCR_PORT + 1) << 8;
 	if ((elcr_status & (ELCR_MASK(0) | ELCR_MASK(1) | ELCR_MASK(2) |
 	    ELCR_MASK(8) | ELCR_MASK(13))) != 0)
 		return (ENXIO);
 	if (bootverbose) {
 		printf("ELCR Found.  ISA IRQs programmed as:\n");
 		for (i = 0; i < 16; i++)
 			printf(" %2d", i);
 		printf("\n");
 		for (i = 0; i < 16; i++)
 			if (elcr_status & ELCR_MASK(i))
 				printf("  L");
 			else
 				printf("  E");
 		printf("\n");
 	}
 	if (resource_disabled("elcr", 0))
 		return (ENXIO);
 	elcr_found = 1;
 	return (0);
 }
 
 /*
  * Returns 1 for level trigger, 0 for edge.
  */
 enum intr_trigger
 elcr_read_trigger(u_int irq)
 {
 
 	KASSERT(elcr_found, ("%s: no ELCR was found!", __func__));
 	KASSERT(irq <= 15, ("%s: invalid IRQ %u", __func__, irq));
 	if (elcr_status & ELCR_MASK(irq))
 		return (INTR_TRIGGER_LEVEL);
 	else
 		return (INTR_TRIGGER_EDGE);
 }
 
 /*
  * Set the trigger mode for a specified IRQ.  Mode of 0 means edge triggered,
  * and a mode of 1 means level triggered.
  */
 void
 elcr_write_trigger(u_int irq, enum intr_trigger trigger)
 {
 	int new_status;
 
 	KASSERT(elcr_found, ("%s: no ELCR was found!", __func__));
 	KASSERT(irq <= 15, ("%s: invalid IRQ %u", __func__, irq));
 	if (trigger == INTR_TRIGGER_LEVEL)
 		new_status = elcr_status | ELCR_MASK(irq);
 	else
 		new_status = elcr_status & ~ELCR_MASK(irq);
 	if (new_status == elcr_status)
 		return;
 	elcr_status = new_status;
 	if (irq >= 8)
 		outb(ELCR_PORT + 1, elcr_status >> 8);
 	else
 		outb(ELCR_PORT, elcr_status & 0xff);
 }
 
 void
 elcr_resume(void)
 {
 
 	KASSERT(elcr_found, ("%s: no ELCR was found!", __func__));
 	outb(ELCR_PORT, elcr_status & 0xff);
 	outb(ELCR_PORT + 1, elcr_status >> 8);
 }
Index: head/sys/x86/x86/intr_machdep.c
===================================================================
--- head/sys/x86/x86/intr_machdep.c	(revision 344854)
+++ head/sys/x86/x86/intr_machdep.c	(revision 344855)
@@ -1,848 +1,847 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Machine dependent interrupt code for x86.  For x86, we have to
  * deal with different PICs.  Thus, we use the passed in vector to lookup
  * an interrupt source associated with that vector.  The interrupt source
  * describes which PIC the source belongs to and includes methods to handle
  * that source.
  */
 
 #include "opt_atpic.h"
 #include "opt_ddb.h"
 #include "opt_smp.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/vmmeter.h>
 #include <machine/clock.h>
 #include <machine/intr_machdep.h>
 #include <machine/smp.h>
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifndef DEV_ATPIC
 #include <machine/segments.h>
 #include <machine/frame.h>
 #include <dev/ic/i8259.h>
 #include <x86/isa/icu.h>
 #include <isa/isareg.h>
 #endif
 
 #include <vm/vm.h>
 
 #define	MAX_STRAY_LOG	5
 
 typedef void (*mask_fn)(void *);
 
 static int intrcnt_index;
 static struct intsrc **interrupt_sources;
 #ifdef SMP
 static struct intsrc **interrupt_sorted;
 static int intrbalance;
 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0,
     "Interrupt auto-balance interval (seconds).  Zero disables.");
 static struct timeout_task intrbalance_task;
 #endif
 static struct sx intrsrc_lock;
 static struct mtx intrpic_lock;
 static struct mtx intrcnt_lock;
 static TAILQ_HEAD(pics_head, pic) pics;
 u_int num_io_irqs;
 
 #if defined(SMP) && !defined(EARLY_AP_STARTUP)
 static int assign_cpu;
 #endif
 
 u_long *intrcnt;
 char *intrnames;
 size_t sintrcnt = sizeof(intrcnt);
 size_t sintrnames = sizeof(intrnames);
 int nintrcnt;
 
 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources");
 
 static int	intr_assign_cpu(void *arg, int cpu);
 static void	intr_disable_src(void *arg);
 static void	intr_init(void *__dummy);
 static int	intr_pic_registered(struct pic *pic);
 static void	intrcnt_setname(const char *name, int index);
 static void	intrcnt_updatename(struct intsrc *is);
 static void	intrcnt_register(struct intsrc *is);
 
 /*
  * SYSINIT levels for SI_SUB_INTR:
  *
  * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init
  * SI_ORDER_SECOND: Xen PICs
  * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges
  * SI_ORDER_FOURTH: Add 8259A PICs
  * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources
  * SI_ORDER_MIDDLE: SMP interrupt counters
  * SI_ORDER_ANY: Enable interrupts on BSP
  */
 
 static int
 intr_pic_registered(struct pic *pic)
 {
 	struct pic *p;
 
 	TAILQ_FOREACH(p, &pics, pics) {
 		if (p == pic)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Register a new interrupt controller (PIC).  This is to support suspend
  * and resume where we suspend/resume controllers rather than individual
  * sources.  This also allows controllers with no active sources (such as
  * 8259As in a system using the APICs) to participate in suspend and resume.
  */
 int
 intr_register_pic(struct pic *pic)
 {
 	int error;
 
 	mtx_lock(&intrpic_lock);
 	if (intr_pic_registered(pic))
 		error = EBUSY;
 	else {
 		TAILQ_INSERT_TAIL(&pics, pic, pics);
 		error = 0;
 	}
 	mtx_unlock(&intrpic_lock);
 	return (error);
 }
 
 /*
  * Allocate interrupt source arrays and register interrupt sources
  * once the number of interrupts is known.
  */
 static void
 intr_init_sources(void *arg)
 {
 	struct pic *pic;
 
 	MPASS(num_io_irqs > 0);
 
 	interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources),
 	    M_INTR, M_WAITOK | M_ZERO);
 #ifdef SMP
 	interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted),
 	    M_INTR, M_WAITOK | M_ZERO);
 #endif
 
 	/*
 	 * - 1 ??? dummy counter.
 	 * - 2 counters for each I/O interrupt.
 	 * - 1 counter for each CPU for lapic timer.
 	 * - 1 counter for each CPU for the Hyper-V vmbus driver.
 	 * - 8 counters for each CPU for IPI counters for SMP.
 	 */
 	nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2;
 #ifdef COUNT_IPIS
 	if (mp_ncpus > 1)
 		nintrcnt += 8 * mp_ncpus;
 #endif
 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK |
 	    M_ZERO);
 	intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK |
 	    M_ZERO);
 	sintrcnt = nintrcnt * sizeof(u_long);
 	sintrnames = nintrcnt * (MAXCOMLEN + 1);
 
 	intrcnt_setname("???", 0);
 	intrcnt_index = 1;
 
 	/*
 	 * NB: intrpic_lock is not held here to avoid LORs due to
 	 * malloc() in intr_register_source().  However, we are still
 	 * single-threaded at this point in startup so the list of
 	 * PICs shouldn't change.
 	 */
 	TAILQ_FOREACH(pic, &pics, pics) {
 		if (pic->pic_register_sources != NULL)
 			pic->pic_register_sources(pic);
 	}
 }
 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources,
     NULL);
 
 /*
  * Register a new interrupt source with the global interrupt system.
  * The global interrupts need to be disabled when this function is
  * called.
  */
 int
 intr_register_source(struct intsrc *isrc)
 {
 	int error, vector;
 
 	KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
 	vector = isrc->is_pic->pic_vector(isrc);
 	KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector,
 	    num_io_irqs));
 	if (interrupt_sources[vector] != NULL)
 		return (EEXIST);
 	error = intr_event_create(&isrc->is_event, isrc, 0, vector,
 	    intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
 	    (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
 	    vector);
 	if (error)
 		return (error);
 	sx_xlock(&intrsrc_lock);
 	if (interrupt_sources[vector] != NULL) {
 		sx_xunlock(&intrsrc_lock);
 		intr_event_destroy(isrc->is_event);
 		return (EEXIST);
 	}
 	intrcnt_register(isrc);
 	interrupt_sources[vector] = isrc;
 	isrc->is_handlers = 0;
 	sx_xunlock(&intrsrc_lock);
 	return (0);
 }
 
 struct intsrc *
 intr_lookup_source(int vector)
 {
 
 	if (vector < 0 || vector >= num_io_irqs)
 		return (NULL);
 	return (interrupt_sources[vector]);
 }
 
 int
 intr_add_handler(const char *name, int vector, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
     int domain)
 {
 	struct intsrc *isrc;
 	int error;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	error = intr_event_add_handler(isrc->is_event, name, filter, handler,
 	    arg, intr_priority(flags), flags, cookiep);
 	if (error == 0) {
 		sx_xlock(&intrsrc_lock);
 		intrcnt_updatename(isrc);
 		isrc->is_handlers++;
 		if (isrc->is_handlers == 1) {
 			isrc->is_domain = domain;
 			isrc->is_pic->pic_enable_intr(isrc);
 			isrc->is_pic->pic_enable_source(isrc);
 		}
 		sx_xunlock(&intrsrc_lock);
 	}
 	return (error);
 }
 
 int
 intr_remove_handler(void *cookie)
 {
 	struct intsrc *isrc;
 	int error;
 
 	isrc = intr_handler_source(cookie);
 	error = intr_event_remove_handler(cookie);
 	if (error == 0) {
 		sx_xlock(&intrsrc_lock);
 		isrc->is_handlers--;
 		if (isrc->is_handlers == 0) {
 			isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
 			isrc->is_pic->pic_disable_intr(isrc);
 		}
 		intrcnt_updatename(isrc);
 		sx_xunlock(&intrsrc_lock);
 	}
 	return (error);
 }
 
 int
 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
 }
 
 static void
 intr_disable_src(void *arg)
 {
 	struct intsrc *isrc;
 
 	isrc = arg;
 	isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
 }
 
 void
 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
 {
 	struct intr_event *ie;
 	int vector;
 
 	/*
 	 * We count software interrupts when we process them.  The
 	 * code here follows previous practice, but there's an
 	 * argument for counting hardware interrupts when they're
 	 * processed too.
 	 */
 	(*isrc->is_count)++;
 	VM_CNT_INC(v_intr);
 
 	ie = isrc->is_event;
 
 	/*
 	 * XXX: We assume that IRQ 0 is only used for the ISA timer
 	 * device (clk).
 	 */
 	vector = isrc->is_pic->pic_vector(isrc);
 	if (vector == 0)
 		clkintr_pending = 1;
 
 	/*
 	 * For stray interrupts, mask and EOI the source, bump the
 	 * stray count, and log the condition.
 	 */
 	if (intr_event_handle(ie, frame) != 0) {
 		isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
 		(*isrc->is_straycount)++;
 		if (*isrc->is_straycount < MAX_STRAY_LOG)
 			log(LOG_ERR, "stray irq%d\n", vector);
 		else if (*isrc->is_straycount == MAX_STRAY_LOG)
 			log(LOG_CRIT,
 			    "too many stray irq %d's: not logging anymore\n",
 			    vector);
 	}
 }
 
 void
 intr_resume(bool suspend_cancelled)
 {
 	struct pic *pic;
 
 #ifndef DEV_ATPIC
 	atpic_reset();
 #endif
 	mtx_lock(&intrpic_lock);
 	TAILQ_FOREACH(pic, &pics, pics) {
 		if (pic->pic_resume != NULL)
 			pic->pic_resume(pic, suspend_cancelled);
 	}
 	mtx_unlock(&intrpic_lock);
 }
 
 void
 intr_suspend(void)
 {
 	struct pic *pic;
 
 	mtx_lock(&intrpic_lock);
 	TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
 		if (pic->pic_suspend != NULL)
 			pic->pic_suspend(pic);
 	}
 	mtx_unlock(&intrpic_lock);
 }
 
 static int
 intr_assign_cpu(void *arg, int cpu)
 {
 #ifdef SMP
 	struct intsrc *isrc;
 	int error;
 
 #ifdef EARLY_AP_STARTUP
 	MPASS(mp_ncpus == 1 || smp_started);
 
 	/* Nothing to do if there is only a single CPU. */
 	if (mp_ncpus > 1 && cpu != NOCPU) {
 #else
 	/*
 	 * Don't do anything during early boot.  We will pick up the
 	 * assignment once the APs are started.
 	 */
 	if (assign_cpu && cpu != NOCPU) {
 #endif
 		isrc = arg;
 		sx_xlock(&intrsrc_lock);
 		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
 		if (error == 0)
 			isrc->is_cpu = cpu;
 		sx_xunlock(&intrsrc_lock);
 	} else
 		error = 0;
 	return (error);
 #else
 	return (EOPNOTSUPP);
 #endif
 }
 
 static void
 intrcnt_setname(const char *name, int index)
 {
 
 	snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
 	    MAXCOMLEN, name);
 }
 
 static void
 intrcnt_updatename(struct intsrc *is)
 {
 
 	intrcnt_setname(is->is_event->ie_fullname, is->is_index);
 }
 
 static void
 intrcnt_register(struct intsrc *is)
 {
 	char straystr[MAXCOMLEN + 1];
 
 	KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
 	mtx_lock_spin(&intrcnt_lock);
 	MPASS(intrcnt_index + 2 <= nintrcnt);
 	is->is_index = intrcnt_index;
 	intrcnt_index += 2;
 	snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
 	    is->is_pic->pic_vector(is));
 	intrcnt_updatename(is);
 	is->is_count = &intrcnt[is->is_index];
 	intrcnt_setname(straystr, is->is_index + 1);
 	is->is_straycount = &intrcnt[is->is_index + 1];
 	mtx_unlock_spin(&intrcnt_lock);
 }
 
 void
 intrcnt_add(const char *name, u_long **countp)
 {
 
 	mtx_lock_spin(&intrcnt_lock);
 	MPASS(intrcnt_index < nintrcnt);
 	*countp = &intrcnt[intrcnt_index];
 	intrcnt_setname(name, intrcnt_index);
 	intrcnt_index++;
 	mtx_unlock_spin(&intrcnt_lock);
 }
 
 static void
 intr_init(void *dummy __unused)
 {
 
 	TAILQ_INIT(&pics);
 	mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF);
 	sx_init(&intrsrc_lock, "intrsrc");
 	mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
 }
 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
 
 static void
 intr_init_final(void *dummy __unused)
 {
 
 	/*
 	 * Enable interrupts on the BSP after all of the interrupt
 	 * controllers are initialized.  Device interrupts are still
 	 * disabled in the interrupt controllers until interrupt
 	 * handlers are registered.  Interrupts are enabled on each AP
 	 * after their first context switch.
 	 */
 	enable_intr();
 }
 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL);
 
 #ifndef DEV_ATPIC
 /* Initialize the two 8259A's to a known-good shutdown state. */
 void
 atpic_reset(void)
 {
 
 	outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
 	outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
 	outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
 	outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
 	outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
 	outb(IO_ICU1, OCW3_SEL | OCW3_RR);
 
 	outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
 	outb(IO_ICU2, OCW3_SEL | OCW3_RR);
 }
 #endif
 
 /* Add a description to an active interrupt handler. */
 int
 intr_describe(u_int vector, void *ih, const char *descr)
 {
 	struct intsrc *isrc;
 	int error;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	error = intr_event_describe_handler(isrc->is_event, ih, descr);
 	if (error)
 		return (error);
 	intrcnt_updatename(isrc);
 	return (0);
 }
 
 void
 intr_reprogram(void)
 {
 	struct intsrc *is;
 	u_int v;
 
 	sx_xlock(&intrsrc_lock);
 	for (v = 0; v < num_io_irqs; v++) {
 		is = interrupt_sources[v];
 		if (is == NULL)
 			continue;
 		if (is->is_pic->pic_reprogram_pin != NULL)
 			is->is_pic->pic_reprogram_pin(is);
 	}
 	sx_xunlock(&intrsrc_lock);
 }
 
 #ifdef DDB
 /*
  * Dump data about interrupt handlers
  */
 DB_SHOW_COMMAND(irqs, db_show_irqs)
 {
 	struct intsrc **isrc;
 	u_int i;
 	int verbose;
 
 	if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	isrc = interrupt_sources;
 	for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++)
 		if (*isrc != NULL)
 			db_dump_intr_event((*isrc)->is_event, verbose);
 }
 #endif
 
 #ifdef SMP
 /*
  * Support for balancing interrupt sources across CPUs.  For now we just
  * allocate CPUs round-robin.
  */
 
 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
 static int current_cpu[MAXMEMDOM];
 
 static void
 intr_init_cpus(void)
 {
 	int i;
 
 	for (i = 0; i < vm_ndomains; i++) {
 		current_cpu[i] = 0;
 		if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
 		    !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
 			intr_next_cpu(i);
 	}
 }
 
 /*
  * Return the CPU that the next interrupt source should use.  For now
  * this just returns the next local APIC according to round-robin.
  */
 u_int
 intr_next_cpu(int domain)
 {
 	u_int apic_id;
 
 #ifdef EARLY_AP_STARTUP
 	MPASS(mp_ncpus == 1 || smp_started);
 	if (mp_ncpus == 1)
 		return (PCPU_GET(apic_id));
 #else
 	/* Leave all interrupts on the BSP during boot. */
 	if (!assign_cpu)
 		return (PCPU_GET(apic_id));
 #endif
 
 	mtx_lock_spin(&icu_lock);
 	apic_id = cpu_apic_ids[current_cpu[domain]];
 	do {
 		current_cpu[domain]++;
 		if (current_cpu[domain] > mp_maxid)
 			current_cpu[domain] = 0;
 	} while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
 	    !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
 
 /* Attempt to bind the specified IRQ to the specified CPU. */
 int
 intr_bind(u_int vector, u_char cpu)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	return (intr_event_bind(isrc->is_event, cpu));
 }
 
 /*
  * Add a CPU to our mask of valid CPUs that can be destinations of
  * interrupts.
  */
 void
 intr_add_cpu(u_int cpu)
 {
 
 	if (cpu >= MAXCPU)
 		panic("%s: Invalid CPU ID", __func__);
 	if (bootverbose)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
 	CPU_SET(cpu, &intr_cpus);
 }
 
 #ifdef EARLY_AP_STARTUP
 static void
 intr_smp_startup(void *arg __unused)
 {
 
 	intr_init_cpus();
 	return;
 }
 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
     NULL);
 
 #else
 /*
  * Distribute all the interrupt sources among the available CPUs once the
  * AP's have been launched.
  */
 static void
 intr_shuffle_irqs(void *arg __unused)
 {
 	struct intsrc *isrc;
 	u_int cpu, i;
 
 	intr_init_cpus();
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
 
 	/* Round-robin assign a CPU to each enabled source. */
 	sx_xlock(&intrsrc_lock);
 	assign_cpu = 1;
 	for (i = 0; i < num_io_irqs; i++) {
 		isrc = interrupt_sources[i];
 		if (isrc != NULL && isrc->is_handlers > 0) {
 			/*
 			 * If this event is already bound to a CPU,
 			 * then assign the source to that CPU instead
 			 * of picking one via round-robin.  Note that
 			 * this is careful to only advance the
 			 * round-robin if the CPU assignment succeeds.
 			 */
 			cpu = isrc->is_event->ie_cpu;
 			if (cpu == NOCPU)
 				cpu = current_cpu[isrc->is_domain];
 			if (isrc->is_pic->pic_assign_cpu(isrc,
 			    cpu_apic_ids[cpu]) == 0) {
 				isrc->is_cpu = cpu;
 				if (isrc->is_event->ie_cpu == NOCPU)
 					intr_next_cpu(isrc->is_domain);
 			}
 		}
 	}
 	sx_xunlock(&intrsrc_lock);
 }
 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
     NULL);
 #endif
 
 /*
  * TODO: Export this information in a non-MD fashion, integrate with vmstat -i.
  */
 static int
 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	struct intsrc *isrc;
 	u_int i;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sx_slock(&intrsrc_lock);
 	for (i = 0; i < num_io_irqs; i++) {
 		isrc = interrupt_sources[i];
 		if (isrc == NULL)
 			continue;
 		sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
 		    isrc->is_event->ie_fullname,
 		    isrc->is_index,
 		    isrc->is_cpu,
 		    isrc->is_domain,
 		    *isrc->is_count);
 	}
 
 	sx_sunlock(&intrsrc_lock);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 SYSCTL_PROC(_hw, OID_AUTO, intrs, CTLTYPE_STRING | CTLFLAG_RW,
     0, 0, sysctl_hw_intrs, "A", "interrupt:number @cpu: count");
 
 /*
  * Compare two, possibly NULL, entries in the interrupt source array
  * by load.
  */
 static int
 intrcmp(const void *one, const void *two)
 {
 	const struct intsrc *i1, *i2;
 
 	i1 = *(const struct intsrc * const *)one;
 	i2 = *(const struct intsrc * const *)two;
 	if (i1 != NULL && i2 != NULL)
 		return (*i1->is_count - *i2->is_count);
 	if (i1 != NULL)
 		return (1);
 	if (i2 != NULL)
 		return (-1);
 	return (0);
 }
 
 /*
  * Balance IRQs across available CPUs according to load.
  */
 static void
 intr_balance(void *dummy __unused, int pending __unused)
 {
 	struct intsrc *isrc;
 	int interval;
 	u_int cpu;
 	int i;
 
 	interval = intrbalance;
 	if (interval == 0)
 		goto out;
 
 	/*
 	 * Sort interrupts according to count.
 	 */
 	sx_xlock(&intrsrc_lock);
 	memcpy(interrupt_sorted, interrupt_sources, num_io_irqs *
 	    sizeof(interrupt_sorted[0]));
 	qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]),
 	    intrcmp);
 
 	/*
 	 * Restart the scan from the same location to avoid moving in the
 	 * common case.
 	 */
 	intr_init_cpus();
 
 	/*
 	 * Assign round-robin from most loaded to least.
 	 */
 	for (i = num_io_irqs - 1; i >= 0; i--) {
 		isrc = interrupt_sorted[i];
 		if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
 			continue;
 		cpu = current_cpu[isrc->is_domain];
 		intr_next_cpu(isrc->is_domain);
 		if (isrc->is_cpu != cpu &&
 		    isrc->is_pic->pic_assign_cpu(isrc,
 		    cpu_apic_ids[cpu]) == 0)
 			isrc->is_cpu = cpu;
 	}
 	sx_xunlock(&intrsrc_lock);
 out:
 	taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task,
 	    interval ? hz * interval : hz * 60);
 
 }
 
 static void
 intr_balance_init(void *dummy __unused)
 {
 
 	TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance,
 	    NULL);
 	taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz);
 }
 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL);
 
 #else
 /*
  * Always route interrupts to the current processor in the UP case.
  */
 u_int
 intr_next_cpu(int domain)
 {
 
 	return (PCPU_GET(apic_id));
 }
 #endif
Index: head/sys/x86/x86/io_apic.c
===================================================================
--- head/sys/x86/x86/io_apic.c	(revision 344854)
+++ head/sys/x86/x86/io_apic.c	(revision 344855)
@@ -1,1252 +1,1251 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 #include "opt_isa.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <machine/resource.h>
 #include <machine/segments.h>
 #include <x86/iommu/iommu_intrmap.h>
 
 #define IOAPIC_ISA_INTS		16
 #define	IOAPIC_MEM_REGION	32
 #define	IOAPIC_REDTBL_LO(i)	(IOAPIC_REDTBL + (i) * 2)
 #define	IOAPIC_REDTBL_HI(i)	(IOAPIC_REDTBL_LO(i) + 1)
 
 static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures");
 
 /*
  * I/O APIC interrupt source driver.  Each pin is assigned an IRQ cookie
  * as laid out in the ACPI System Interrupt number model where each I/O
  * APIC has a contiguous chunk of the System Interrupt address space.
  * We assume that IRQs 1 - 15 behave like ISA IRQs and that all other
  * IRQs behave as PCI IRQs by default.  We also assume that the pin for
  * IRQ 0 is actually an ExtINT pin.  The apic enumerators override the
  * configuration of individual pins as indicated by their tables.
  *
  * Documentation for the I/O APIC: "82093AA I/O Advanced Programmable
  * Interrupt Controller (IOAPIC)", May 1996, Intel Corp.
  * ftp://download.intel.com/design/chipsets/datashts/29056601.pdf
  */
 
 struct ioapic_intsrc {
 	struct intsrc io_intsrc;
 	int io_irq;
 	u_int io_intpin:8;
 	u_int io_vector:8;
 	u_int io_cpu;
 	u_int io_activehi:1;
 	u_int io_edgetrigger:1;
 	u_int io_masked:1;
 	int io_bus:4;
 	uint32_t io_lowreg;
 	u_int io_remap_cookie;
 };
 
 struct ioapic {
 	struct pic io_pic;
 	u_int io_id:8;			/* logical ID */
 	u_int io_apic_id:4;
 	u_int io_intbase:8;		/* System Interrupt base */
 	u_int io_numintr:8;
 	u_int io_haseoi:1;
 	volatile ioapic_t *io_addr;	/* XXX: should use bus_space */
 	vm_paddr_t io_paddr;
 	STAILQ_ENTRY(ioapic) io_next;
 	device_t pci_dev;		/* matched pci device, if found */
 	struct resource *pci_wnd;	/* BAR 0, should be same or alias to
 					   io_paddr */
 	struct ioapic_intsrc io_pins[0];
 };
 
 static u_int	ioapic_read(volatile ioapic_t *apic, int reg);
 static void	ioapic_write(volatile ioapic_t *apic, int reg, u_int val);
 static const char *ioapic_bus_string(int bus_type);
 static void	ioapic_print_irq(struct ioapic_intsrc *intpin);
 static void	ioapic_register_sources(struct pic *pic);
 static void	ioapic_enable_source(struct intsrc *isrc);
 static void	ioapic_disable_source(struct intsrc *isrc, int eoi);
 static void	ioapic_eoi_source(struct intsrc *isrc);
 static void	ioapic_enable_intr(struct intsrc *isrc);
 static void	ioapic_disable_intr(struct intsrc *isrc);
 static int	ioapic_vector(struct intsrc *isrc);
 static int	ioapic_source_pending(struct intsrc *isrc);
 static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
 static void	ioapic_resume(struct pic *pic, bool suspend_cancelled);
 static int	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
 static void	ioapic_reprogram_intpin(struct intsrc *isrc);
 
 static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
 struct pic ioapic_template = {
 	.pic_register_sources = ioapic_register_sources,
 	.pic_enable_source = ioapic_enable_source,
 	.pic_disable_source = ioapic_disable_source,
 	.pic_eoi_source = ioapic_eoi_source,
 	.pic_enable_intr = ioapic_enable_intr,
 	.pic_disable_intr = ioapic_disable_intr,
 	.pic_vector = ioapic_vector,
 	.pic_source_pending = ioapic_source_pending,
 	.pic_suspend = NULL,
 	.pic_resume = ioapic_resume,
 	.pic_config_intr = ioapic_config_intr,
 	.pic_assign_cpu = ioapic_assign_cpu,
 	.pic_reprogram_pin = ioapic_reprogram_intpin,
 };
 
 static u_int next_ioapic_base;
 static u_int next_id;
 
 static int enable_extint;
 SYSCTL_INT(_hw_apic, OID_AUTO, enable_extint, CTLFLAG_RDTUN, &enable_extint, 0,
     "Enable the ExtINT pin in the first I/O APIC");
 
 static void
 _ioapic_eoi_source(struct intsrc *isrc, int locked)
 {
 	struct ioapic_intsrc *src;
 	struct ioapic *io;
 	volatile uint32_t *apic_eoi;
 	uint32_t low1;
 
 	lapic_eoi();
 	if (!lapic_eoi_suppression)
 		return;
 	src = (struct ioapic_intsrc *)isrc;
 	if (src->io_edgetrigger)
 		return;
 	io = (struct ioapic *)isrc->is_pic;
 
 	/*
 	 * Handle targeted EOI for level-triggered pins, if broadcast
 	 * EOI suppression is supported by LAPICs.
 	 */
 	if (io->io_haseoi) {
 		/*
 		 * If IOAPIC has EOI Register, simply write vector
 		 * number into the reg.
 		 */
 		apic_eoi = (volatile uint32_t *)((volatile char *)
 		    io->io_addr + IOAPIC_EOIR);
 		*apic_eoi = src->io_vector;
 	} else {
 		/*
 		 * Otherwise, if IO-APIC is too old to provide EOIR,
 		 * do what Intel did for the Linux kernel. Temporary
 		 * switch the pin to edge-trigger and back, masking
 		 * the pin during the trick.
 		 */
 		if (!locked)
 			mtx_lock_spin(&icu_lock);
 		low1 = src->io_lowreg;
 		low1 &= ~IOART_TRGRLVL;
 		low1 |= IOART_TRGREDG | IOART_INTMSET;
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin),
 		    low1);
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin),
 		    src->io_lowreg);
 		if (!locked)
 			mtx_unlock_spin(&icu_lock);
 	}
 }
 
 static u_int
 ioapic_read(volatile ioapic_t *apic, int reg)
 {
 
 	mtx_assert(&icu_lock, MA_OWNED);
 	apic->ioregsel = reg;
 	return (apic->iowin);
 }
 
 static void
 ioapic_write(volatile ioapic_t *apic, int reg, u_int val)
 {
 
 	mtx_assert(&icu_lock, MA_OWNED);
 	apic->ioregsel = reg;
 	apic->iowin = val;
 }
 
 static const char *
 ioapic_bus_string(int bus_type)
 {
 
 	switch (bus_type) {
 	case APIC_BUS_ISA:
 		return ("ISA");
 	case APIC_BUS_EISA:
 		return ("EISA");
 	case APIC_BUS_PCI:
 		return ("PCI");
 	default:
 		return ("unknown");
 	}
 }
 
 static void
 ioapic_print_irq(struct ioapic_intsrc *intpin)
 {
 
 	switch (intpin->io_irq) {
 	case IRQ_DISABLED:
 		printf("disabled");
 		break;
 	case IRQ_EXTINT:
 		printf("ExtINT");
 		break;
 	case IRQ_NMI:
 		printf("NMI");
 		break;
 	case IRQ_SMI:
 		printf("SMI");
 		break;
 	default:
 		printf("%s IRQ %d", ioapic_bus_string(intpin->io_bus),
 		    intpin->io_irq);
 	}
 }
 
 static void
 ioapic_enable_source(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 	uint32_t flags;
 
 	mtx_lock_spin(&icu_lock);
 	if (intpin->io_masked) {
 		flags = intpin->io_lowreg & ~IOART_INTMASK;
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
 		    flags);
 		intpin->io_masked = 0;
 	}
 	mtx_unlock_spin(&icu_lock);
 }
 
 static void
 ioapic_disable_source(struct intsrc *isrc, int eoi)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 	uint32_t flags;
 
 	mtx_lock_spin(&icu_lock);
 	if (!intpin->io_masked && !intpin->io_edgetrigger) {
 		flags = intpin->io_lowreg | IOART_INTMSET;
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
 		    flags);
 		intpin->io_masked = 1;
 	}
 
 	if (eoi == PIC_EOI)
 		_ioapic_eoi_source(isrc, 1);
 
 	mtx_unlock_spin(&icu_lock);
 }
 
 static void
 ioapic_eoi_source(struct intsrc *isrc)
 {
 
 	_ioapic_eoi_source(isrc, 0);
 }
 
 /*
  * Completely program an intpin based on the data in its interrupt source
  * structure.
  */
 static void
 ioapic_program_intpin(struct ioapic_intsrc *intpin)
 {
 	struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
 	uint32_t low, high;
 #ifdef ACPI_DMAR
 	int error;
 #endif
 
 	/*
 	 * If a pin is completely invalid or if it is valid but hasn't
 	 * been enabled yet, just ensure that the pin is masked.
 	 */
 	mtx_assert(&icu_lock, MA_OWNED);
 	if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq >= 0 &&
 	    intpin->io_vector == 0)) {
 		low = ioapic_read(io->io_addr,
 		    IOAPIC_REDTBL_LO(intpin->io_intpin));
 		if ((low & IOART_INTMASK) == IOART_INTMCLR)
 			ioapic_write(io->io_addr,
 			    IOAPIC_REDTBL_LO(intpin->io_intpin),
 			    low | IOART_INTMSET);
 #ifdef ACPI_DMAR
 		mtx_unlock_spin(&icu_lock);
 		iommu_unmap_ioapic_intr(io->io_apic_id,
 		    &intpin->io_remap_cookie);
 		mtx_lock_spin(&icu_lock);
 #endif
 		return;
 	}
 
 #ifdef ACPI_DMAR
 	mtx_unlock_spin(&icu_lock);
 	error = iommu_map_ioapic_intr(io->io_apic_id,
 	    intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger,
 	    intpin->io_activehi, intpin->io_irq, &intpin->io_remap_cookie,
 	    &high, &low);
 	mtx_lock_spin(&icu_lock);
 	if (error == 0) {
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin),
 		    high);
 		intpin->io_lowreg = low;
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
 		    low);
 		return;
 	} else if (error != EOPNOTSUPP) {
 		return;
 	}
 #endif
 
 	/*
 	 * Set the destination.  Note that with Intel interrupt remapping,
 	 * the previously reserved bits 55:48 now have a purpose so ensure
 	 * these are zero.
 	 */
 	low = IOART_DESTPHY;
 	high = intpin->io_cpu << APIC_ID_SHIFT;
 
 	/* Program the rest of the low word. */
 	if (intpin->io_edgetrigger)
 		low |= IOART_TRGREDG;
 	else
 		low |= IOART_TRGRLVL;
 	if (intpin->io_activehi)
 		low |= IOART_INTAHI;
 	else
 		low |= IOART_INTALO;
 	if (intpin->io_masked)
 		low |= IOART_INTMSET;
 	switch (intpin->io_irq) {
 	case IRQ_EXTINT:
 		KASSERT(intpin->io_edgetrigger,
 		    ("ExtINT not edge triggered"));
 		low |= IOART_DELEXINT;
 		break;
 	case IRQ_NMI:
 		KASSERT(intpin->io_edgetrigger,
 		    ("NMI not edge triggered"));
 		low |= IOART_DELNMI;
 		break;
 	case IRQ_SMI:
 		KASSERT(intpin->io_edgetrigger,
 		    ("SMI not edge triggered"));
 		low |= IOART_DELSMI;
 		break;
 	default:
 		KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u",
 		    intpin->io_irq));
 		low |= IOART_DELFIXED | intpin->io_vector;
 	}
 
 	/* Write the values to the APIC. */
 	ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), high);
 	intpin->io_lowreg = low;
 	ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low);
 }
 
 static void
 ioapic_reprogram_intpin(struct intsrc *isrc)
 {
 
 	mtx_lock_spin(&icu_lock);
 	ioapic_program_intpin((struct ioapic_intsrc *)isrc);
 	mtx_unlock_spin(&icu_lock);
 }
 
 static int
 ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 	u_int old_vector, new_vector;
 	u_int old_id;
 
 	/*
 	 * On Hyper-V:
 	 * - Stick to the first cpu for all I/O APIC pins.
 	 * - And don't allow destination cpu changes.
 	 */
 	if (vm_guest == VM_GUEST_HV) {
 		if (intpin->io_vector)
 			return (EINVAL);
 		else
 			apic_id = 0;
 	}
 
 	/*
 	 * keep 1st core as the destination for NMI
 	 */
 	if (intpin->io_irq == IRQ_NMI)
 		apic_id = 0;
 
 	/*
 	 * Set us up to free the old irq.
 	 */
 	old_vector = intpin->io_vector;
 	old_id = intpin->io_cpu;
 	if (old_vector && apic_id == old_id)
 		return (0);
 
 	/*
 	 * Allocate an APIC vector for this interrupt pin.  Once
 	 * we have a vector we program the interrupt pin.
 	 */
 	new_vector = apic_alloc_vector(apic_id, intpin->io_irq);
 	if (new_vector == 0)
 		return (ENOSPC);
 
 	/*
 	 * Mask the old intpin if it is enabled while it is migrated.
 	 *
 	 * At least some level-triggered interrupts seem to need the
 	 * extra DELAY() to avoid being stuck in a non-EOI'd state.
 	 */
 	mtx_lock_spin(&icu_lock);
 	if (!intpin->io_masked && !intpin->io_edgetrigger) {
 		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
 		    intpin->io_lowreg | IOART_INTMSET);
 		mtx_unlock_spin(&icu_lock);
 		DELAY(100);
 		mtx_lock_spin(&icu_lock);
 	}
 
 	intpin->io_cpu = apic_id;
 	intpin->io_vector = new_vector;
 	if (isrc->is_handlers > 0)
 		apic_enable_vector(intpin->io_cpu, intpin->io_vector);
 	if (bootverbose) {
 		printf("ioapic%u: routing intpin %u (", io->io_id,
 		    intpin->io_intpin);
 		ioapic_print_irq(intpin);
 		printf(") to lapic %u vector %u\n", intpin->io_cpu,
 		    intpin->io_vector);
 	}
 	ioapic_program_intpin(intpin);
 	mtx_unlock_spin(&icu_lock);
 
 	/*
 	 * Free the old vector after the new one is established.  This is done
 	 * to prevent races where we could miss an interrupt.
 	 */
 	if (old_vector) {
 		if (isrc->is_handlers > 0)
 			apic_disable_vector(old_id, old_vector);
 		apic_free_vector(old_id, old_vector, intpin->io_irq);
 	}
 	return (0);
 }
 
 static void
 ioapic_enable_intr(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
 	if (intpin->io_vector == 0)
 		if (ioapic_assign_cpu(isrc, intr_next_cpu(isrc->is_domain)) != 0)
 			panic("Couldn't find an APIC vector for IRQ %d",
 			    intpin->io_irq);
 	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
 }
 
 
 static void
 ioapic_disable_intr(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	u_int vector;
 
 	if (intpin->io_vector != 0) {
 		/* Mask this interrupt pin and free its APIC vector. */
 		vector = intpin->io_vector;
 		apic_disable_vector(intpin->io_cpu, vector);
 		mtx_lock_spin(&icu_lock);
 		intpin->io_masked = 1;
 		intpin->io_vector = 0;
 		ioapic_program_intpin(intpin);
 		mtx_unlock_spin(&icu_lock);
 		apic_free_vector(intpin->io_cpu, vector, intpin->io_irq);
 	}
 }
 
 static int
 ioapic_vector(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *pin;
 
 	pin = (struct ioapic_intsrc *)isrc;
 	return (pin->io_irq);
 }
 
 static int
 ioapic_source_pending(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
 	if (intpin->io_vector == 0)
 		return 0;
 	return (lapic_intr_pending(intpin->io_vector));
 }
 
 static int
 ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 	int changed;
 
 	KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
 	    ("%s: Conforming trigger or polarity\n", __func__));
 
 	/*
 	 * EISA interrupts always use active high polarity, so don't allow
 	 * them to be set to active low.
 	 *
 	 * XXX: Should we write to the ELCR if the trigger mode changes for
 	 * an EISA IRQ or an ISA IRQ with the ELCR present?
 	 */
 	mtx_lock_spin(&icu_lock);
 	if (intpin->io_bus == APIC_BUS_EISA)
 		pol = INTR_POLARITY_HIGH;
 	changed = 0;
 	if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) {
 		if (bootverbose)
 			printf("ioapic%u: Changing trigger for pin %u to %s\n",
 			    io->io_id, intpin->io_intpin,
 			    trig == INTR_TRIGGER_EDGE ? "edge" : "level");
 		intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE);
 		changed++;
 	}
 	if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) {
 		if (bootverbose)
 			printf("ioapic%u: Changing polarity for pin %u to %s\n",
 			    io->io_id, intpin->io_intpin,
 			    pol == INTR_POLARITY_HIGH ? "high" : "low");
 		intpin->io_activehi = (pol == INTR_POLARITY_HIGH);
 		changed++;
 	}
 	if (changed)
 		ioapic_program_intpin(intpin);
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 static void
 ioapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 	struct ioapic *io = (struct ioapic *)pic;
 	int i;
 
 	mtx_lock_spin(&icu_lock);
 	for (i = 0; i < io->io_numintr; i++)
 		ioapic_program_intpin(&io->io_pins[i]);
 	mtx_unlock_spin(&icu_lock);
 }
 
 /*
  * Create a plain I/O APIC object.
  */
 void *
 ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase)
 {
 	struct ioapic *io;
 	struct ioapic_intsrc *intpin;
 	volatile ioapic_t *apic;
 	u_int numintr, i;
 	uint32_t value;
 
 	/* Map the register window so we can access the device. */
 	apic = pmap_mapdev(addr, IOAPIC_MEM_REGION);
 	mtx_lock_spin(&icu_lock);
 	value = ioapic_read(apic, IOAPIC_VER);
 	mtx_unlock_spin(&icu_lock);
 
 	/* If it's version register doesn't seem to work, punt. */
 	if (value == 0xffffffff) {
 		pmap_unmapdev((vm_offset_t)apic, IOAPIC_MEM_REGION);
 		return (NULL);
 	}
 
 	/* Determine the number of vectors and set the APIC ID. */
 	numintr = ((value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) + 1;
 	io = malloc(sizeof(struct ioapic) +
 	    numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK);
 	io->io_pic = ioapic_template;
 	io->pci_dev = NULL;
 	io->pci_wnd = NULL;
 	mtx_lock_spin(&icu_lock);
 	io->io_id = next_id++;
 	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
 	if (apic_id != -1 && io->io_apic_id != apic_id) {
 		ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
 		mtx_unlock_spin(&icu_lock);
 		io->io_apic_id = apic_id;
 		printf("ioapic%u: Changing APIC ID to %d\n", io->io_id,
 		    apic_id);
 	} else
 		mtx_unlock_spin(&icu_lock);
 	if (intbase == -1) {
 		intbase = next_ioapic_base;
 		printf("ioapic%u: Assuming intbase of %d\n", io->io_id,
 		    intbase);
 	} else if (intbase != next_ioapic_base && bootverbose)
 		printf("ioapic%u: WARNING: intbase %d != expected base %d\n",
 		    io->io_id, intbase, next_ioapic_base);
 	io->io_intbase = intbase;
 	next_ioapic_base = intbase + numintr;
 	if (next_ioapic_base > num_io_irqs)
 		num_io_irqs = next_ioapic_base;
 	io->io_numintr = numintr;
 	io->io_addr = apic;
 	io->io_paddr = addr;
 
 	if (bootverbose) {
 		printf("ioapic%u: ver 0x%02x maxredir 0x%02x\n", io->io_id,
 		    (value & IOART_VER_VERSION), (value & IOART_VER_MAXREDIR)
 		    >> MAXREDIRSHIFT);
 	}
 	/*
 	 * The  summary information about IO-APIC versions is taken from
 	 * the Linux kernel source:
 	 *     0Xh     82489DX
 	 *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
 	 *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
 	 *     30h-FFh Reserved
 	 * IO-APICs with version >= 0x20 have working EOIR register.
 	 */
 	io->io_haseoi = (value & IOART_VER_VERSION) >= 0x20;
 
 	/*
 	 * Initialize pins.  Start off with interrupts disabled.  Default
 	 * to active-hi and edge-triggered for ISA interrupts and active-lo
 	 * and level-triggered for all others.
 	 */
 	bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr);
 	mtx_lock_spin(&icu_lock);
 	for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) {
 		intpin->io_intsrc.is_pic = (struct pic *)io;
 		intpin->io_intpin = i;
 		intpin->io_irq = intbase + i;
 
 		/*
 		 * Assume that pin 0 on the first I/O APIC is an ExtINT pin.
 		 * Assume that pins 1-15 are ISA interrupts and that all
 		 * other pins are PCI interrupts.
 		 */
 		if (intpin->io_irq == 0)
 			ioapic_set_extint(io, i);
 		else if (intpin->io_irq < IOAPIC_ISA_INTS) {
 			intpin->io_bus = APIC_BUS_ISA;
 			intpin->io_activehi = 1;
 			intpin->io_edgetrigger = 1;
 			intpin->io_masked = 1;
 		} else {
 			intpin->io_bus = APIC_BUS_PCI;
 			intpin->io_activehi = 0;
 			intpin->io_edgetrigger = 0;
 			intpin->io_masked = 1;
 		}
 
 		/*
 		 * Route interrupts to the BSP by default.  Interrupts may
 		 * be routed to other CPUs later after they are enabled.
 		 */
 		intpin->io_cpu = PCPU_GET(apic_id);
 		value = ioapic_read(apic, IOAPIC_REDTBL_LO(i));
 		ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET);
 #ifdef ACPI_DMAR
 		/* dummy, but sets cookie */
 		mtx_unlock_spin(&icu_lock);
 		iommu_map_ioapic_intr(io->io_apic_id,
 		    intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger,
 		    intpin->io_activehi, intpin->io_irq,
 		    &intpin->io_remap_cookie, NULL, NULL);
 		mtx_lock_spin(&icu_lock);
 #endif
 	}
 	mtx_unlock_spin(&icu_lock);
 
 	return (io);
 }
 
 int
 ioapic_get_vector(void *cookie, u_int pin)
 {
 	struct ioapic *io;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr)
 		return (-1);
 	return (io->io_pins[pin].io_irq);
 }
 
 int
 ioapic_disable_pin(void *cookie, u_int pin)
 {
 	struct ioapic *io;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq == IRQ_DISABLED)
 		return (EINVAL);
 	io->io_pins[pin].io_irq = IRQ_DISABLED;
 	if (bootverbose)
 		printf("ioapic%u: intpin %d disabled\n", io->io_id, pin);
 	return (0);
 }
 
 int
 ioapic_remap_vector(void *cookie, u_int pin, int vector)
 {
 	struct ioapic *io;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr || vector < 0)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	io->io_pins[pin].io_irq = vector;
 	if (bootverbose)
 		printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id,
 		    vector, pin);
 	return (0);
 }
 
 int
 ioapic_set_bus(void *cookie, u_int pin, int bus_type)
 {
 	struct ioapic *io;
 
 	if (bus_type < 0 || bus_type > APIC_BUS_MAX)
 		return (EINVAL);
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	if (io->io_pins[pin].io_bus == bus_type)
 		return (0);
 	io->io_pins[pin].io_bus = bus_type;
 	if (bootverbose)
 		printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin,
 		    ioapic_bus_string(bus_type));
 	return (0);
 }
 
 int
 ioapic_set_nmi(void *cookie, u_int pin)
 {
 	struct ioapic *io;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq == IRQ_NMI)
 		return (0);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
 	io->io_pins[pin].io_irq = IRQ_NMI;
 	io->io_pins[pin].io_masked = 0;
 	io->io_pins[pin].io_edgetrigger = 1;
 	io->io_pins[pin].io_activehi = 1;
 	if (bootverbose)
 		printf("ioapic%u: Routing NMI -> intpin %d\n",
 		    io->io_id, pin);
 	return (0);
 }
 
 int
 ioapic_set_smi(void *cookie, u_int pin)
 {
 	struct ioapic *io;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq == IRQ_SMI)
 		return (0);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
 	io->io_pins[pin].io_irq = IRQ_SMI;
 	io->io_pins[pin].io_masked = 0;
 	io->io_pins[pin].io_edgetrigger = 1;
 	io->io_pins[pin].io_activehi = 1;
 	if (bootverbose)
 		printf("ioapic%u: Routing SMI -> intpin %d\n",
 		    io->io_id, pin);
 	return (0);
 }
 
 int
 ioapic_set_extint(void *cookie, u_int pin)
 {
 	struct ioapic *io;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq == IRQ_EXTINT)
 		return (0);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
 	io->io_pins[pin].io_irq = IRQ_EXTINT;
 	if (enable_extint)
 		io->io_pins[pin].io_masked = 0;
 	else
 		io->io_pins[pin].io_masked = 1;
 	io->io_pins[pin].io_edgetrigger = 1;
 	io->io_pins[pin].io_activehi = 1;
 	if (bootverbose)
 		printf("ioapic%u: Routing external 8259A's -> intpin %d\n",
 		    io->io_id, pin);
 	return (0);
 }
 
 int
 ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
 {
 	struct ioapic *io;
 	int activehi;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	activehi = (pol == INTR_POLARITY_HIGH);
 	if (io->io_pins[pin].io_activehi == activehi)
 		return (0);
 	io->io_pins[pin].io_activehi = activehi;
 	if (bootverbose)
 		printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin,
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 	return (0);
 }
 
 int
 ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
 {
 	struct ioapic *io;
 	int edgetrigger;
 
 	io = (struct ioapic *)cookie;
 	if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq < 0)
 		return (EINVAL);
 	edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 	if (io->io_pins[pin].io_edgetrigger == edgetrigger)
 		return (0);
 	io->io_pins[pin].io_edgetrigger = edgetrigger;
 	if (bootverbose)
 		printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin,
 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
 	return (0);
 }
 
 /*
  * Register a complete I/O APIC object with the interrupt subsystem.
  */
 void
 ioapic_register(void *cookie)
 {
 	struct ioapic_intsrc *pin;
 	struct ioapic *io;
 	volatile ioapic_t *apic;
 	uint32_t flags;
 	int i;
 
 	io = (struct ioapic *)cookie;
 	apic = io->io_addr;
 	mtx_lock_spin(&icu_lock);
 	flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION;
 	STAILQ_INSERT_TAIL(&ioapic_list, io, io_next);
 	mtx_unlock_spin(&icu_lock);
 	printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
 	    io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
 	    io->io_intbase + io->io_numintr - 1);
 
 	/*
 	 * Reprogram pins to handle special case pins (such as NMI and
 	 * SMI) and disable normal pins until a handler is registered.
 	 */
 	intr_register_pic(&io->io_pic);
 	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++)
 		ioapic_reprogram_intpin(&pin->io_intsrc);
 }
 
 /*
  * Add interrupt sources for I/O APIC interrupt pins.
  */
 static void
 ioapic_register_sources(struct pic *pic)
 {
 	struct ioapic_intsrc *pin;
 	struct ioapic *io;
 	int i;
 
 	io = (struct ioapic *)pic;
 	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) {
 		if (pin->io_irq >= 0)
 			intr_register_source(&pin->io_intsrc);
 	}
 }
 
 /* A simple new-bus driver to consume PCI I/O APIC devices. */
 static int
 ioapic_pci_probe(device_t dev)
 {
 
 	if (pci_get_class(dev) == PCIC_BASEPERIPH &&
 	    pci_get_subclass(dev) == PCIS_BASEPERIPH_PIC) {
 		switch (pci_get_progif(dev)) {
 		case PCIP_BASEPERIPH_PIC_IO_APIC:
 			device_set_desc(dev, "IO APIC");
 			break;
 		case PCIP_BASEPERIPH_PIC_IOX_APIC:
 			device_set_desc(dev, "IO(x) APIC");
 			break;
 		default:
 			return (ENXIO);
 		}
 		device_quiet(dev);
 		return (-10000);
 	}
 	return (ENXIO);
 }
 
 static int
 ioapic_pci_attach(device_t dev)
 {
 	struct resource *res;
 	volatile ioapic_t *apic;
 	struct ioapic *io;
 	int rid;
 	u_int apic_id;
 
 	/*
 	 * Try to match the enumerated ioapic.  Match BAR start
 	 * against io_paddr.  Due to a fear that PCI window is not the
 	 * same as the MADT reported io window, but an alias, read the
 	 * APIC ID from the mapped BAR and match against it.
 	 */
 	rid = PCIR_BAR(0);
 	res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE | RF_SHAREABLE);
 	if (res == NULL) {
 		if (bootverbose)
 			device_printf(dev, "cannot activate BAR0\n");
 		return (ENXIO);
 	}
 	apic = (volatile ioapic_t *)rman_get_virtual(res);
 	if (rman_get_size(res) < IOAPIC_WND_SIZE) {
 		if (bootverbose)
 			device_printf(dev,
 			    "BAR0 too small (%jd) for IOAPIC window\n",
 			    (uintmax_t)rman_get_size(res));
 		goto fail;
 	}
 	mtx_lock_spin(&icu_lock);
 	apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
 	/* First match by io window address */
 	STAILQ_FOREACH(io, &ioapic_list, io_next) {
 		if (io->io_paddr == (vm_paddr_t)rman_get_start(res))
 			goto found;
 	}
 	/* Then by apic id */
 	STAILQ_FOREACH(io, &ioapic_list, io_next) {
 		if (io->io_apic_id == apic_id)
 			goto found;
 	}
 	mtx_unlock_spin(&icu_lock);
 	if (bootverbose)
 		device_printf(dev,
 		    "cannot match pci bar apic id %d against MADT\n",
 		    apic_id);
 fail:
 	bus_release_resource(dev, SYS_RES_MEMORY, rid, res);
 	return (ENXIO);
 found:
 	KASSERT(io->pci_dev == NULL,
 	    ("ioapic %d pci_dev not NULL", io->io_id));
 	KASSERT(io->pci_wnd == NULL,
 	    ("ioapic %d pci_wnd not NULL", io->io_id));
 
 	io->pci_dev = dev;
 	io->pci_wnd = res;
 	if (bootverbose && (io->io_paddr != (vm_paddr_t)rman_get_start(res) ||
 	    io->io_apic_id != apic_id)) {
 		device_printf(dev, "pci%d:%d:%d:%d pci BAR0@%jx id %d "
 		    "MADT id %d paddr@%jx\n",
 		    pci_get_domain(dev), pci_get_bus(dev),
 		    pci_get_slot(dev), pci_get_function(dev),
 		    (uintmax_t)rman_get_start(res), apic_id,
 		    io->io_apic_id, (uintmax_t)io->io_paddr);
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 static device_method_t ioapic_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		ioapic_pci_probe),
 	DEVMETHOD(device_attach,	ioapic_pci_attach),
 
 	{ 0, 0 }
 };
 
 DEFINE_CLASS_0(ioapic, ioapic_pci_driver, ioapic_pci_methods, 0);
 
 static devclass_t ioapic_devclass;
 DRIVER_MODULE(ioapic, pci, ioapic_pci_driver, ioapic_devclass, 0, 0);
 
 int
 ioapic_get_rid(u_int apic_id, uint16_t *ridp)
 {
 	struct ioapic *io;
 	uintptr_t rid;
 	int error;
 
 	mtx_lock_spin(&icu_lock);
 	STAILQ_FOREACH(io, &ioapic_list, io_next) {
 		if (io->io_apic_id == apic_id)
 			break;
 	}
 	mtx_unlock_spin(&icu_lock);
 	if (io == NULL || io->pci_dev == NULL)
 		return (EINVAL);
 	error = pci_get_id(io->pci_dev, PCI_ID_RID, &rid);
 	if (error != 0)
 		return (error);
 	*ridp = rid;
 	return (0);
 }
 
 /*
  * A new-bus driver to consume the memory resources associated with
  * the APICs in the system.  On some systems ACPI or PnPBIOS system
  * resource devices may already claim these resources.  To keep from
  * breaking those devices, we attach ourself to the nexus device after
  * legacy0 and acpi0 and ignore any allocation failures.
  */
 static void
 apic_identify(driver_t *driver, device_t parent)
 {
 
 	/*
 	 * Add at order 12.  acpi0 is probed at order 10 and legacy0
 	 * is probed at order 11.
 	 */
 	if (lapic_paddr != 0)
 		BUS_ADD_CHILD(parent, 12, "apic", 0);
 }
 
 static int
 apic_probe(device_t dev)
 {
 
 	device_set_desc(dev, "APIC resources");
 	device_quiet(dev);
 	return (0);
 }
 
 static void
 apic_add_resource(device_t dev, int rid, vm_paddr_t base, size_t length)
 {
 	int error;
 
 	error = bus_set_resource(dev, SYS_RES_MEMORY, rid, base, length);
 	if (error)
 		panic("apic_add_resource: resource %d failed set with %d", rid,
 		    error);
 	bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_SHAREABLE);
 }
 
 static int
 apic_attach(device_t dev)
 {
 	struct ioapic *io;
 	int i;
 
 	/* Reserve the local APIC. */
 	apic_add_resource(dev, 0, lapic_paddr, LAPIC_MEM_REGION);
 	i = 1;
 	STAILQ_FOREACH(io, &ioapic_list, io_next) {
 		apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION);
 		i++;
 	}
 	return (0);
 }
 
 static device_method_t apic_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	apic_identify),
 	DEVMETHOD(device_probe,		apic_probe),
 	DEVMETHOD(device_attach,	apic_attach),
 
 	{ 0, 0 }
 };
 
 DEFINE_CLASS_0(apic, apic_driver, apic_methods, 0);
 
 static devclass_t apic_devclass;
 DRIVER_MODULE(apic, nexus, apic_driver, apic_devclass, 0, 0);
 
 #include "opt_ddb.h"
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static const char *
 ioapic_delivery_mode(uint32_t mode)
 {
 
 	switch (mode) {
 	case IOART_DELFIXED:
 		return ("fixed");
 	case IOART_DELLOPRI:
 		return ("lowestpri");
 	case IOART_DELSMI:
 		return ("SMI");
 	case IOART_DELRSV1:
 		return ("rsrvd1");
 	case IOART_DELNMI:
 		return ("NMI");
 	case IOART_DELINIT:
 		return ("INIT");
 	case IOART_DELRSV2:
 		return ("rsrvd2");
 	case IOART_DELEXINT:
 		return ("ExtINT");
 	default:
 		return ("");
 	}
 }
 
 static u_int
 db_ioapic_read(volatile ioapic_t *apic, int reg)
 {
 
 	apic->ioregsel = reg;
 	return (apic->iowin);
 }
 
 static void
 db_show_ioapic_one(volatile ioapic_t *io_addr)
 {
 	uint32_t r, lo, hi;
 	int mre, i;
 
 	r = db_ioapic_read(io_addr, IOAPIC_VER);
 	mre = (r & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT;
 	db_printf("Id 0x%08x Ver 0x%02x MRE %d\n",
 	    db_ioapic_read(io_addr, IOAPIC_ID), r & IOART_VER_VERSION, mre);
 	for (i = 0; i < mre; i++) {
 		lo = db_ioapic_read(io_addr, IOAPIC_REDTBL_LO(i));
 		hi = db_ioapic_read(io_addr, IOAPIC_REDTBL_HI(i));
 		db_printf("  pin %d Dest %s/%x %smasked Trig %s RemoteIRR %d "
 		    "Polarity %s Status %s DeliveryMode %s Vec %d\n", i,
 		    (lo & IOART_DESTMOD) == IOART_DESTLOG ? "log" : "phy",
 		    (hi & IOART_DEST) >> 24,
 		    (lo & IOART_INTMASK) == IOART_INTMSET ? "" : "not",
 		    (lo & IOART_TRGRMOD) == IOART_TRGRLVL ? "lvl" : "edge",
 		    (lo & IOART_REM_IRR) == IOART_REM_IRR ? 1 : 0,
 		    (lo & IOART_INTPOL) == IOART_INTALO ? "low" : "high",
 		    (lo & IOART_DELIVS) == IOART_DELIVS ? "pend" : "idle",
 		    ioapic_delivery_mode(lo & IOART_DELMOD),
 		    (lo & IOART_INTVEC));
 	  }
 }
 
 DB_SHOW_COMMAND(ioapic, db_show_ioapic)
 {
 	struct ioapic *ioapic;
 	int idx, i;
 
 	if (!have_addr) {
 		db_printf("usage: show ioapic index\n");
 		return;
 	}
 
 	idx = (int)addr;
 	i = 0;
 	STAILQ_FOREACH(ioapic, &ioapic_list, io_next) {
 		if (idx == i) {
 			db_show_ioapic_one(ioapic->io_addr);
 			break;
 		}
 		i++;
 	}
 }
 
 DB_SHOW_ALL_COMMAND(ioapics, db_show_all_ioapics)
 {
 	struct ioapic *ioapic;
 
 	STAILQ_FOREACH(ioapic, &ioapic_list, io_next)
 		db_show_ioapic_one(ioapic->io_addr);
 }
 #endif
Index: head/sys/x86/x86/local_apic.c
===================================================================
--- head/sys/x86/x86/local_apic.c	(revision 344854)
+++ head/sys/x86/x86/local_apic.c	(revision 344855)
@@ -1,2182 +1,2182 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Local APIC support on Pentium and later processors.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atpic.h"
 #include "opt_hwpmc_hooks.h"
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/timeet.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <x86/init.h>
 
 #ifdef DDB
 #include <sys/interrupt.h>
 #include <ddb/ddb.h>
 #endif
 
 #ifdef __amd64__
 #define	SDT_APIC	SDT_SYSIGT
 #define	GSEL_APIC	0
 #else
 #define	SDT_APIC	SDT_SYS386IGT
 #define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
 #endif
 
 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items");
 
 /* Sanity checks on IDT vectors. */
 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
 CTASSERT(APIC_LOCAL_INTS == 240);
 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
 
 /*
  * I/O interrupts use non-negative IRQ values.  These values are used
  * to mark unused IDT entries or IDT entries reserved for a non-I/O
  * interrupt.
  */
 #define	IRQ_FREE	-1
 #define	IRQ_TIMER	-2
 #define	IRQ_SYSCALL	-3
 #define	IRQ_DTRACE_RET	-4
 #define	IRQ_EVTCHN	-5
 
 enum lat_timer_mode {
 	LAT_MODE_UNDEF =	0,
 	LAT_MODE_PERIODIC =	1,
 	LAT_MODE_ONESHOT =	2,
 	LAT_MODE_DEADLINE =	3,
 };
 
 /*
  * Support for local APICs.  Local APICs manage interrupts on each
  * individual processor as opposed to I/O APICs which receive interrupts
  * from I/O devices and then forward them on to the local APICs.
  *
  * Local APICs can also send interrupts to each other thus providing the
  * mechanism for IPIs.
  */
 
 struct lvt {
 	u_int lvt_edgetrigger:1;
 	u_int lvt_activehi:1;
 	u_int lvt_masked:1;
 	u_int lvt_active:1;
 	u_int lvt_mode:16;
 	u_int lvt_vector:8;
 };
 
 struct lapic {
 	struct lvt la_lvts[APIC_LVT_MAX + 1];
 	struct lvt la_elvts[APIC_ELVT_MAX + 1];;
 	u_int la_id:8;
 	u_int la_cluster:4;
 	u_int la_cluster_id:2;
 	u_int la_present:1;
 	u_long *la_timer_count;
 	uint64_t la_timer_period;
 	enum lat_timer_mode la_timer_mode;
 	uint32_t lvt_timer_base;
 	uint32_t lvt_timer_last;
 	/* Include IDT_SYSCALL to make indexing easier. */
 	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static *lapics;
 
 /* Global defaults for local APIC LVT entries. */
 static struct lvt lvts[APIC_LVT_MAX + 1] = {
 	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
 	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
 	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
 	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
 };
 
 /* Global defaults for AMD local APIC ELVT entries. */
 static struct lvt elvts[APIC_ELVT_MAX + 1] = {
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT },
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
 };
 
 static inthand_t *ioint_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1),	/* 32 - 63 */
 	IDTVEC(apic_isr2),	/* 64 - 95 */
 	IDTVEC(apic_isr3),	/* 96 - 127 */
 	IDTVEC(apic_isr4),	/* 128 - 159 */
 	IDTVEC(apic_isr5),	/* 160 - 191 */
 	IDTVEC(apic_isr6),	/* 192 - 223 */
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
 static inthand_t *ioint_pti_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1_pti),	/* 32 - 63 */
 	IDTVEC(apic_isr2_pti),	/* 64 - 95 */
 	IDTVEC(apic_isr3_pti),	/* 96 - 127 */
 	IDTVEC(apic_isr4_pti),	/* 128 - 159 */
 	IDTVEC(apic_isr5_pti),	/* 160 - 191 */
 	IDTVEC(apic_isr6_pti),	/* 192 - 223 */
 	IDTVEC(apic_isr7_pti),	/* 224 - 255 */
 };
 
 static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
 
 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
 
 volatile char *lapic_map;
 vm_paddr_t lapic_paddr;
 int x2apic_mode;
 int lapic_eoi_suppression;
 static int lapic_timer_tsc_deadline;
 static u_long lapic_timer_divisor, count_freq;
 static struct eventtimer lapic_et;
 #ifdef SMP
 static uint64_t lapic_ipi_wait_mult;
 #endif
 unsigned int max_apic_id;
 
 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
     &lapic_eoi_suppression, 0, "");
 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD,
     &lapic_timer_tsc_deadline, 0, "");
 
 static void lapic_calibrate_initcount(struct lapic *la);
 static void lapic_calibrate_deadline(struct lapic *la);
 
 static uint32_t
 lapic_read32(enum LAPIC_REGISTERS reg)
 {
 	uint32_t res;
 
 	if (x2apic_mode) {
 		res = rdmsr32(MSR_APIC_000 + reg);
 	} else {
 		res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
 	}
 	return (res);
 }
 
 static void
 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
 {
 
 	if (x2apic_mode) {
 		mfence();
 		lfence();
 		wrmsr(MSR_APIC_000 + reg, val);
 	} else {
 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
 	}
 }
 
 static void
 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
 {
 
 	if (x2apic_mode) {
 		wrmsr(MSR_APIC_000 + reg, val);
 	} else {
 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
 	}
 }
 
 #ifdef SMP
 static uint64_t
 lapic_read_icr(void)
 {
 	uint64_t v;
 	uint32_t vhi, vlo;
 
 	if (x2apic_mode) {
 		v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO);
 	} else {
 		vhi = lapic_read32(LAPIC_ICR_HI);
 		vlo = lapic_read32(LAPIC_ICR_LO);
 		v = ((uint64_t)vhi << 32) | vlo;
 	}
 	return (v);
 }
 
 static uint64_t
 lapic_read_icr_lo(void)
 {
 
 	return (lapic_read32(LAPIC_ICR_LO));
 }
 
 static void
 lapic_write_icr(uint32_t vhi, uint32_t vlo)
 {
 	uint64_t v;
 
 	if (x2apic_mode) {
 		v = ((uint64_t)vhi << 32) | vlo;
 		mfence();
 		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
 	} else {
 		lapic_write32(LAPIC_ICR_HI, vhi);
 		lapic_write32(LAPIC_ICR_LO, vlo);
 	}
 }
 #endif /* SMP */
 
 static void
 native_lapic_enable_x2apic(void)
 {
 	uint64_t apic_base;
 
 	apic_base = rdmsr(MSR_APICBASE);
 	apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
 	wrmsr(MSR_APICBASE, apic_base);
 }
 
 static bool
 native_lapic_is_x2apic(void)
 {
 	uint64_t apic_base;
 
 	apic_base = rdmsr(MSR_APICBASE);
 	return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) ==
 	    (APICBASE_X2APIC | APICBASE_ENABLED));
 }
 
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
 static void	lapic_timer_oneshot(struct lapic *);
 static void	lapic_timer_oneshot_nointr(struct lapic *, uint32_t);
 static void	lapic_timer_periodic(struct lapic *);
 static void	lapic_timer_deadline(struct lapic *);
 static void	lapic_timer_stop(struct lapic *);
 static void	lapic_timer_set_divisor(u_int divisor);
 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
 static int	lapic_et_start(struct eventtimer *et,
 		    sbintime_t first, sbintime_t period);
 static int	lapic_et_stop(struct eventtimer *et);
 static u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
 static void	lapic_set_tpr(u_int vector);
 
 struct pic lapic_pic = { .pic_resume = lapic_resume };
 
 /* Forward declarations for apic_ops */
 static void	native_lapic_create(u_int apic_id, int boot_cpu);
 static void	native_lapic_init(vm_paddr_t addr);
 static void	native_lapic_xapic_mode(void);
 static void	native_lapic_setup(int boot);
 static void	native_lapic_dump(const char *str);
 static void	native_lapic_disable(void);
 static void	native_lapic_eoi(void);
 static int	native_lapic_id(void);
 static int	native_lapic_intr_pending(u_int vector);
 static u_int	native_apic_cpuid(u_int apic_id);
 static u_int	native_apic_alloc_vector(u_int apic_id, u_int irq);
 static u_int	native_apic_alloc_vectors(u_int apic_id, u_int *irqs,
 		    u_int count, u_int align);
 static void 	native_apic_disable_vector(u_int apic_id, u_int vector);
 static void 	native_apic_enable_vector(u_int apic_id, u_int vector);
 static void 	native_apic_free_vector(u_int apic_id, u_int vector, u_int irq);
 static void 	native_lapic_set_logical_id(u_int apic_id, u_int cluster,
 		    u_int cluster_id);
 static int 	native_lapic_enable_pmc(void);
 static void 	native_lapic_disable_pmc(void);
 static void 	native_lapic_reenable_pmc(void);
 static void 	native_lapic_enable_cmc(void);
 static int 	native_lapic_enable_mca_elvt(void);
 static int 	native_lapic_set_lvt_mask(u_int apic_id, u_int lvt,
 		    u_char masked);
 static int 	native_lapic_set_lvt_mode(u_int apic_id, u_int lvt,
 		    uint32_t mode);
 static int 	native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
 		    enum intr_polarity pol);
 static int 	native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
 		    enum intr_trigger trigger);
 #ifdef SMP
 static void 	native_lapic_ipi_raw(register_t icrlo, u_int dest);
 static void 	native_lapic_ipi_vectored(u_int vector, int dest);
 static int 	native_lapic_ipi_wait(int delay);
 #endif /* SMP */
 static int	native_lapic_ipi_alloc(inthand_t *ipifunc);
 static void	native_lapic_ipi_free(int vector);
 
 struct apic_ops apic_ops = {
 	.create			= native_lapic_create,
 	.init			= native_lapic_init,
 	.xapic_mode		= native_lapic_xapic_mode,
 	.is_x2apic		= native_lapic_is_x2apic,
 	.setup			= native_lapic_setup,
 	.dump			= native_lapic_dump,
 	.disable		= native_lapic_disable,
 	.eoi			= native_lapic_eoi,
 	.id			= native_lapic_id,
 	.intr_pending		= native_lapic_intr_pending,
 	.set_logical_id		= native_lapic_set_logical_id,
 	.cpuid			= native_apic_cpuid,
 	.alloc_vector		= native_apic_alloc_vector,
 	.alloc_vectors		= native_apic_alloc_vectors,
 	.enable_vector		= native_apic_enable_vector,
 	.disable_vector		= native_apic_disable_vector,
 	.free_vector		= native_apic_free_vector,
 	.enable_pmc		= native_lapic_enable_pmc,
 	.disable_pmc		= native_lapic_disable_pmc,
 	.reenable_pmc		= native_lapic_reenable_pmc,
 	.enable_cmc		= native_lapic_enable_cmc,
 	.enable_mca_elvt	= native_lapic_enable_mca_elvt,
 #ifdef SMP
 	.ipi_raw		= native_lapic_ipi_raw,
 	.ipi_vectored		= native_lapic_ipi_vectored,
 	.ipi_wait		= native_lapic_ipi_wait,
 #endif
 	.ipi_alloc		= native_lapic_ipi_alloc,
 	.ipi_free		= native_lapic_ipi_free,
 	.set_lvt_mask		= native_lapic_set_lvt_mask,
 	.set_lvt_mode		= native_lapic_set_lvt_mode,
 	.set_lvt_polarity	= native_lapic_set_lvt_polarity,
 	.set_lvt_triggermode	= native_lapic_set_lvt_triggermode,
 };
 
 static uint32_t
 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
 {
 
 	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
 	    APIC_LVT_VECTOR);
 	if (lvt->lvt_edgetrigger == 0)
 		value |= APIC_LVT_TM;
 	if (lvt->lvt_activehi == 0)
 		value |= APIC_LVT_IIPP_INTALO;
 	if (lvt->lvt_masked)
 		value |= APIC_LVT_M;
 	value |= lvt->lvt_mode;
 	switch (lvt->lvt_mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		if (!lvt->lvt_edgetrigger && bootverbose) {
 			printf("lapic%u: Forcing LINT%u to edge trigger\n",
 			    la->la_id, pin);
 			value &= ~APIC_LVT_TM;
 		}
 		/* Use a vector of 0. */
 		break;
 	case APIC_LVT_DM_FIXED:
 		value |= lvt->lvt_vector;
 		break;
 	default:
 		panic("bad APIC LVT delivery mode: %#x\n", value);
 	}
 	return (value);
 }
 
 static uint32_t
 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
 {
 	struct lvt *lvt;
 
 	KASSERT(pin <= APIC_LVT_MAX,
 	    ("%s: pin %u out of range", __func__, pin));
 	if (la->la_lvts[pin].lvt_active)
 		lvt = &la->la_lvts[pin];
 	else
 		lvt = &lvts[pin];
 
 	return (lvt_mode_impl(la, lvt, pin, value));
 }
 
 static uint32_t
 elvt_mode(struct lapic *la, u_int idx, uint32_t value)
 {
 	struct lvt *elvt;
 
 	KASSERT(idx <= APIC_ELVT_MAX,
 	    ("%s: idx %u out of range", __func__, idx));
 
 	elvt = &la->la_elvts[idx];
 	KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
 	KASSERT(elvt->lvt_edgetrigger,
 	    ("%s: ELVT%u is not edge triggered", __func__, idx));
 	KASSERT(elvt->lvt_activehi,
 	    ("%s: ELVT%u is not active high", __func__, idx));
 	return (lvt_mode_impl(la, elvt, idx, value));
 }
 
 /*
  * Map the local APIC and setup necessary interrupt vectors.
  */
 static void
 native_lapic_init(vm_paddr_t addr)
 {
 #ifdef SMP
 	uint64_t r, r1, r2, rx;
 #endif
 	uint32_t ver;
 	u_int regs[4];
 	int i, arat;
 
 	/*
 	 * Enable x2APIC mode if possible. Map the local APIC
 	 * registers page.
 	 *
 	 * Keep the LAPIC registers page mapped uncached for x2APIC
 	 * mode too, to have direct map page attribute set to
 	 * uncached.  This is needed to work around CPU errata present
 	 * on all Intel processors.
 	 */
 	KASSERT(trunc_page(addr) == addr,
 	    ("local APIC not aligned on a page boundary"));
 	lapic_paddr = addr;
 	lapic_map = pmap_mapdev(addr, PAGE_SIZE);
 	if (x2apic_mode) {
 		native_lapic_enable_x2apic();
 		lapic_map = NULL;
 	}
 
 	/* Setup the spurious interrupt handler. */
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* Local APIC error interrupt. */
 	setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* XXX: Thermal interrupt */
 
 	/* Local APIC CMCI. */
 	setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
 		arat = 0;
 		/* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */
 		if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) {
 			do_cpuid(0x06, regs);
 			if ((regs[0] & CPUTPM1_ARAT) != 0)
 				arat = 1;
 		} else if (cpu_vendor_id == CPU_VENDOR_AMD &&
 		    CPUID_TO_FAMILY(cpu_id) >= 0x12) {
 			arat = 1;
 		}
 		bzero(&lapic_et, sizeof(lapic_et));
 		lapic_et.et_name = "LAPIC";
 		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
 		    ET_FLAGS_PERCPU;
 		lapic_et.et_quality = 600;
 		if (!arat) {
 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
 			lapic_et.et_quality = 100;
 		}
 		if ((cpu_feature & CPUID_TSC) != 0 &&
 		    (cpu_feature2 & CPUID2_TSCDLT) != 0 &&
 		    tsc_is_invariant && tsc_freq != 0) {
 			lapic_timer_tsc_deadline = 1;
 			TUNABLE_INT_FETCH("hw.lapic_tsc_deadline",
 			    &lapic_timer_tsc_deadline);
 		}
 
 		lapic_et.et_frequency = 0;
 		/* We don't know frequency yet, so trying to guess. */
 		lapic_et.et_min_period = 0x00001000LL;
 		lapic_et.et_max_period = SBT_1S;
 		lapic_et.et_start = lapic_et_start;
 		lapic_et.et_stop = lapic_et_stop;
 		lapic_et.et_priv = NULL;
 		et_register(&lapic_et);
 	}
 
 	/*
 	 * Set lapic_eoi_suppression after lapic_enable(), to not
 	 * enable suppression in the hardware prematurely.  Note that
 	 * we by default enable suppression even when system only has
 	 * one IO-APIC, since EOI is broadcasted to all APIC agents,
 	 * including CPUs, otherwise.
 	 *
 	 * It seems that at least some KVM versions report
 	 * EOI_SUPPRESSION bit, but auto-EOI does not work.
 	 */
 	ver = lapic_read32(LAPIC_VERSION);
 	if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
 		lapic_eoi_suppression = 1;
 		if (vm_guest == VM_GUEST_KVM) {
 			if (bootverbose)
 				printf(
 		       "KVM -- disabling lapic eoi suppression\n");
 			lapic_eoi_suppression = 0;
 		}
 		TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
 		    &lapic_eoi_suppression);
 	}
 
 #ifdef SMP
 #define	LOOPS	100000
 	/*
 	 * Calibrate the busy loop waiting for IPI ack in xAPIC mode.
 	 * lapic_ipi_wait_mult contains the number of iterations which
 	 * approximately delay execution for 1 microsecond (the
 	 * argument to native_lapic_ipi_wait() is in microseconds).
 	 *
 	 * We assume that TSC is present and already measured.
 	 * Possible TSC frequency jumps are irrelevant to the
 	 * calibration loop below, the CPU clock management code is
 	 * not yet started, and we do not enter sleep states.
 	 */
 	KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0,
 	    ("TSC not initialized"));
 	if (!x2apic_mode) {
 		r = rdtsc();
 		for (rx = 0; rx < LOOPS; rx++) {
 			(void)lapic_read_icr_lo();
 			ia32_pause();
 		}
 		r = rdtsc() - r;
 		r1 = tsc_freq * LOOPS;
 		r2 = r * 1000000;
 		lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1;
 		if (bootverbose) {
 			printf("LAPIC: ipi_wait() us multiplier %ju (r %ju "
 			    "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult,
 			    (uintmax_t)r, (uintmax_t)tsc_freq);
 		}
 	}
 #undef LOOPS
 #endif /* SMP */
 }
 
 /*
  * Create a local APIC instance.
  */
 static void
 native_lapic_create(u_int apic_id, int boot_cpu)
 {
 	int i;
 
 	if (apic_id > max_apic_id) {
 		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
 		if (boot_cpu)
 			panic("Can't ignore BSP");
 		return;
 	}
 	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
 	    apic_id));
 
 	/*
 	 * Assume no local LVT overrides and a cluster of 0 and
 	 * intra-cluster ID of 0.
 	 */
 	lapics[apic_id].la_present = 1;
 	lapics[apic_id].la_id = apic_id;
 	for (i = 0; i <= APIC_LVT_MAX; i++) {
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_ELVT_MAX; i++) {
 		lapics[apic_id].la_elvts[i] = elvts[i];
 		lapics[apic_id].la_elvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_NUM_IOINTS; i++)
 	    lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE;
 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
 	    IRQ_TIMER;
 #ifdef KDTRACE_HOOKS
 	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
 	    IRQ_DTRACE_RET;
 #endif
 #ifdef XENHVM
 	lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
 #endif
 
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
 #endif
 }
 
 static inline uint32_t
 amd_read_ext_features(void)
 {
 	uint32_t version;
 
 	if (cpu_vendor_id != CPU_VENDOR_AMD)
 		return (0);
 	version = lapic_read32(LAPIC_VERSION);
 	if ((version & APIC_VER_AMD_EXT_SPACE) != 0)
 		return (lapic_read32(LAPIC_EXT_FEATURES));
 	else
 		return (0);
 }
 
 static inline uint32_t
 amd_read_elvt_count(void)
 {
 	uint32_t extf;
 	uint32_t count;
 
 	extf = amd_read_ext_features();
 	count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT;
 	count = min(count, APIC_ELVT_MAX + 1);
 	return (count);
 }
 
 /*
  * Dump contents of local APIC registers
  */
 static void
 native_lapic_dump(const char* str)
 {
 	uint32_t version;
 	uint32_t maxlvt;
 	uint32_t extf;
 	int elvt_count;
 	int i;
 
 	version = lapic_read32(LAPIC_VERSION);
 	maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
 	    lapic_read32(LAPIC_ID), version,
 	    lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
 	if ((cpu_feature2 & CPUID2_X2APIC) != 0)
 		printf(" x2APIC: %d", x2apic_mode);
 	printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
 	    lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
 	    lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
 	    lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
 	    lapic_read32(LAPIC_LVT_ERROR));
 	if (maxlvt >= APIC_LVT_PMC)
 		printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
 	printf("\n");
 	if (maxlvt >= APIC_LVT_CMCI)
 		printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
 	extf = amd_read_ext_features();
 	if (extf != 0) {
 		printf("   AMD ext features: 0x%08x\n", extf);
 		elvt_count = amd_read_elvt_count();
 		for (i = 0; i < elvt_count; i++)
 			printf("   AMD elvt%d: 0x%08x\n", i,
 			    lapic_read32(LAPIC_EXT_LVT0 + i));
 	}
 }
 
 static void
 native_lapic_xapic_mode(void)
 {
 	register_t saveintr;
 
 	saveintr = intr_disable();
 	if (x2apic_mode)
 		native_lapic_enable_x2apic();
 	intr_restore(saveintr);
 }
 
 static void
 native_lapic_setup(int boot)
 {
 	struct lapic *la;
 	uint32_t version;
 	uint32_t maxlvt;
 	register_t saveintr;
 	int elvt_count;
 	int i;
 
 	saveintr = intr_disable();
 
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
 	version = lapic_read32(LAPIC_VERSION);
 	maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
 	lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
 	    lapic_read32(LAPIC_LVT_LINT0)));
 	lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
 	    lapic_read32(LAPIC_LVT_LINT1)));
 
 	/* Program the PMC LVT entry if present. */
 	if (maxlvt >= APIC_LVT_PMC) {
 		lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
 		    LAPIC_LVT_PCINT));
 	}
 
 	/* Program timer LVT. */
 	la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER,
 	    lapic_read32(LAPIC_LVT_TIMER));
 	la->lvt_timer_last = la->lvt_timer_base;
 	lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base);
 
 	/* Calibrate the timer parameters using BSP. */
 	if (boot && IS_BSP()) {
 		lapic_calibrate_initcount(la);
 		if (lapic_timer_tsc_deadline)
 			lapic_calibrate_deadline(la);
 	}
 
 	/* Setup the timer if configured. */
 	if (la->la_timer_mode != LAT_MODE_UNDEF) {
 		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
 		    lapic_id()));
 		switch (la->la_timer_mode) {
 		case LAT_MODE_PERIODIC:
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_periodic(la);
 			break;
 		case LAT_MODE_ONESHOT:
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_oneshot(la);
 			break;
 		case LAT_MODE_DEADLINE:
 			lapic_timer_deadline(la);
 			break;
 		default:
 			panic("corrupted la_timer_mode %p %d", la,
 			    la->la_timer_mode);
 		}
 	}
 
 	/* Program error LVT and clear any existing errors. */
 	lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
 	    lapic_read32(LAPIC_LVT_ERROR)));
 	lapic_write32(LAPIC_ESR, 0);
 
 	/* XXX: Thermal LVT */
 
 	/* Program the CMCI LVT entry if present. */
 	if (maxlvt >= APIC_LVT_CMCI) {
 		lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
 		    lapic_read32(LAPIC_LVT_CMCI)));
 	}
 
 	elvt_count = amd_read_elvt_count();
 	for (i = 0; i < elvt_count; i++) {
 		if (la->la_elvts[i].lvt_active)
 			lapic_write32(LAPIC_EXT_LVT0 + i,
 			    elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i)));
 	}
 
 	intr_restore(saveintr);
 }
 
 static void
 native_lapic_intrcnt(void *dummy __unused)
 {
 	struct pcpu *pc;
 	struct lapic *la;
 	char buf[MAXCOMLEN + 1];
 
 	/* If there are no APICs, skip this function. */
 	if (lapics == NULL)
 		return;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		la = &lapics[pc->pc_apic_id];
 		if (!la->la_present)
 		    continue;
 
 		snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid);
 		intrcnt_add(buf, &la->la_timer_count);
 	}
 }
 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt,
     NULL);
 
 static void
 native_lapic_reenable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
 	value = lapic_read32(LAPIC_LVT_PCINT);
 	value &= ~APIC_LVT_M;
 	lapic_write32(LAPIC_LVT_PCINT, value);
 #endif
 }
 
 #ifdef HWPMC_HOOKS
 static void
 lapic_update_pmc(void *dummy)
 {
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
 	lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
 	    lapic_read32(LAPIC_LVT_PCINT)));
 }
 #endif
 
 static int
 native_lapic_enable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (!x2apic_mode && lapic_map == NULL)
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return (0);
 
 	lvts[APIC_LVT_PMC].lvt_masked = 0;
 
 #ifdef EARLY_AP_STARTUP
 	MPASS(mp_ncpus == 1 || smp_started);
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #else
 #ifdef SMP
 	/*
 	 * If hwpmc was loaded at boot time then the APs may not be
 	 * started yet.  In that case, don't forward the request to
 	 * them as they will program the lvt when they start.
 	 */
 	if (smp_started)
 		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 	else
 #endif
 		lapic_update_pmc(NULL);
 #endif
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 static void
 native_lapic_disable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (!x2apic_mode && lapic_map == NULL)
 		return;
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return;
 
 	lvts[APIC_LVT_PMC].lvt_masked = 1;
 
 #ifdef SMP
 	/* The APs should always be started when hwpmc is unloaded. */
 	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
 #endif
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #endif
 }
 
 static void
 lapic_calibrate_initcount(struct lapic *la)
 {
 	u_long value;
 
 	/* Start off with a divisor of 2 (power on reset default). */
 	lapic_timer_divisor = 2;
 	/* Try to calibrate the local APIC timer. */
 	do {
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
 		DELAY(1000000);
 		value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER);
 		if (value != APIC_TIMER_MAX_COUNT)
 			break;
 		lapic_timer_divisor <<= 1;
 	} while (lapic_timer_divisor <= 128);
 	if (lapic_timer_divisor > 128)
 		panic("lapic: Divisor too big");
 	if (bootverbose) {
 		printf("lapic: Divisor %lu, Frequency %lu Hz\n",
 		    lapic_timer_divisor, value);
 	}
 	count_freq = value;
 }
 
 static void
 lapic_calibrate_deadline(struct lapic *la __unused)
 {
 
 	if (bootverbose) {
 		printf("lapic: deadline tsc mode, Frequency %ju Hz\n",
 		    (uintmax_t)tsc_freq);
 	}
 }
 
 static void
 lapic_change_mode(struct eventtimer *et, struct lapic *la,
     enum lat_timer_mode newmode)
 {
 
 	if (la->la_timer_mode == newmode)
 		return;
 	switch (newmode) {
 	case LAT_MODE_PERIODIC:
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		et->et_frequency = count_freq;
 		break;
 	case LAT_MODE_DEADLINE:
 		et->et_frequency = tsc_freq;
 		break;
 	case LAT_MODE_ONESHOT:
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		et->et_frequency = count_freq;
 		break;
 	default:
 		panic("lapic_change_mode %d", newmode);
 	}
 	la->la_timer_mode = newmode;
 	et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
 	et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
 }
 
 static int
 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
 {
 	struct lapic *la;
 
 	la = &lapics[PCPU_GET(apic_id)];
 	if (period != 0) {
 		lapic_change_mode(et, la, LAT_MODE_PERIODIC);
 		la->la_timer_period = ((uint32_t)et->et_frequency * period) >>
 		    32;
 		lapic_timer_periodic(la);
 	} else if (lapic_timer_tsc_deadline) {
 		lapic_change_mode(et, la, LAT_MODE_DEADLINE);
 		la->la_timer_period = (et->et_frequency * first) >> 32;
 		lapic_timer_deadline(la);
 	} else {
 		lapic_change_mode(et, la, LAT_MODE_ONESHOT);
 		la->la_timer_period = ((uint32_t)et->et_frequency * first) >>
 		    32;
 		lapic_timer_oneshot(la);
 	}
 	return (0);
 }
 
 static int
 lapic_et_stop(struct eventtimer *et)
 {
 	struct lapic *la;
 
 	la = &lapics[PCPU_GET(apic_id)];
 	lapic_timer_stop(la);
 	la->la_timer_mode = LAT_MODE_UNDEF;
 	return (0);
 }
 
 static void
 native_lapic_disable(void)
 {
 	uint32_t value;
 
 	/* Software disable the local APIC. */
 	value = lapic_read32(LAPIC_SVR);
 	value &= ~APIC_SVR_SWEN;
 	lapic_write32(LAPIC_SVR, value);
 }
 
 static void
 lapic_enable(void)
 {
 	uint32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
 	value = lapic_read32(LAPIC_SVR);
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
 	value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
 	if (lapic_eoi_suppression)
 		value |= APIC_SVR_EOI_SUPPRESSION;
 	lapic_write32(LAPIC_SVR, value);
 }
 
 /* Reset the local APIC on the BSP during resume. */
 static void
 lapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 
 	lapic_setup(0);
 }
 
 static int
 native_lapic_id(void)
 {
 	uint32_t v;
 
 	KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
 	v = lapic_read32(LAPIC_ID);
 	if (!x2apic_mode)
 		v >>= APIC_ID_SHIFT;
 	return (v);
 }
 
 static int
 native_lapic_intr_pending(u_int vector)
 {
 	uint32_t irr;
 
 	/*
 	 * The IRR registers are an array of registers each of which
 	 * only describes 32 interrupts in the low 32 bits.  Thus, we
 	 * divide the vector by 32 to get the register index.
 	 * Finally, we modulus the vector by 32 to determine the
 	 * individual bit to test.
 	 */
 	irr = lapic_read32(LAPIC_IRR0 + vector / 32);
 	return (irr & 1 << (vector % 32));
 }
 
 static void
 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 	struct lapic *la;
 
 	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
 	    __func__, apic_id));
 	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
 	    __func__, cluster));
 	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
 	    ("%s: intra cluster id %u too big", __func__, cluster_id));
 	la = &lapics[apic_id];
 	la->la_cluster = cluster;
 	la->la_cluster_id = cluster_id;
 }
 
 static int
 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
 {
 
 	if (pin > APIC_LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_masked = masked;
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
 {
 	struct lvt *lvt;
 
 	if (pin > APIC_LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvt = &lvts[pin];
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lvt = &lapics[apic_id].la_lvts[pin];
 		lvt->lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	lvt->lvt_mode = mode;
 	switch (mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		lvt->lvt_edgetrigger = 1;
 		lvt->lvt_activehi = 1;
 		if (mode == APIC_LVT_DM_EXTINT)
 			lvt->lvt_masked = 1;
 		else
 			lvt->lvt_masked = 0;
 		break;
 	default:
 		panic("Unsupported delivery mode: 0x%x\n", mode);
 	}
 	if (bootverbose) {
 		printf(" Routing ");
 		switch (mode) {
 		case APIC_LVT_DM_NMI:
 			printf("NMI");
 			break;
 		case APIC_LVT_DM_SMI:
 			printf("SMI");
 			break;
 		case APIC_LVT_DM_INIT:
 			printf("INIT");
 			break;
 		case APIC_LVT_DM_EXTINT:
 			printf("ExtINT");
 			break;
 		}
 		printf(" -> LINT%u\n", pin);
 	}
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
 {
 
 	if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		lapics[apic_id].la_lvts[pin].lvt_activehi =
 		    (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u polarity: %s\n", pin,
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
      enum intr_trigger trigger)
 {
 
 	if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
 		    (trigger == INTR_TRIGGER_EDGE);
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u trigger: %s\n", pin,
 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
 	return (0);
 }
 
 /*
  * Adjust the TPR of the current CPU so that it blocks all interrupts below
  * the passed in vector.
  */
 static void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
 	lapic_write32(LAPIC_TPR, vector);
 #else
 	uint32_t tpr;
 
 	tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
 	tpr |= vector;
 	lapic_write32(LAPIC_TPR, tpr);
 #endif
 }
 
 static void
 native_lapic_eoi(void)
 {
 
 	lapic_write32_nofence(LAPIC_EOI, 0);
 }
 
 void
 lapic_handle_intr(int vector, struct trapframe *frame)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
 	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
 void
 lapic_handle_timer(struct trapframe *frame)
 {
 	struct lapic *la;
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	/* Send EOI first thing. */
 	lapic_eoi();
 
 #if defined(SMP) && !defined(SCHED_ULE)
 	/*
 	 * Don't do any accounting for the disabled HTT cores, since it
 	 * will provide misleading numbers for the userland.
 	 *
 	 * No locking is necessary here, since even if we lose the race
 	 * when hlt_cpus_mask changes it is not a big deal, really.
 	 *
 	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
 	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
 		return;
 #endif
 
 	/* Look up our local APIC structure for the tick counters. */
 	la = &lapics[PCPU_GET(apic_id)];
 	(*la->la_timer_count)++;
 	critical_enter();
 	if (lapic_et.et_active) {
 		td = curthread;
 		td->td_intr_nesting_level++;
 		oldframe = td->td_intr_frame;
 		td->td_intr_frame = frame;
 		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
 		td->td_intr_frame = oldframe;
 		td->td_intr_nesting_level--;
 	}
 	critical_exit();
 }
 
 static void
 lapic_timer_set_divisor(u_int divisor)
 {
 
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors),
 		("lapic: invalid divisor %u", divisor));
 	lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
 }
 
 static void
 lapic_timer_oneshot(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
 	value |= APIC_LVTT_TM_ONE_SHOT;
 	la->lvt_timer_last = value;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
 }
 
 static void
 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M;
 	la->lvt_timer_last = value;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, count);
 }
 
 static void
 lapic_timer_periodic(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
 	value |= APIC_LVTT_TM_PERIODIC;
 	la->lvt_timer_last = value;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
 }
 
 static void
 lapic_timer_deadline(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_base;
 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
 	value |= APIC_LVTT_TM_TSCDLT;
 	if (value != la->lvt_timer_last) {
 		la->lvt_timer_last = value;
 		lapic_write32_nofence(LAPIC_LVT_TIMER, value);
 		if (!x2apic_mode)
 			mfence();
 	}
 	wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc());
 }
 
 static void
 lapic_timer_stop(struct lapic *la)
 {
 	uint32_t value;
 
 	if (la->la_timer_mode == LAT_MODE_DEADLINE) {
 		wrmsr(MSR_TSC_DEADLINE, 0);
 		mfence();
 	} else {
 		value = la->lvt_timer_base;
 		value &= ~APIC_LVTT_TM;
 		value |= APIC_LVT_M;
 		la->lvt_timer_last = value;
 		lapic_write32(LAPIC_LVT_TIMER, value);
 	}
 }
 
 void
 lapic_handle_cmc(void)
 {
 
 	lapic_eoi();
 	cmc_intr();
 }
 
 /*
  * Called from the mca_init() to activate the CMC interrupt if this CPU is
  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
  * is called prior to lapic_setup() during boot, this just needs to unmask
  * this CPU's LVT_CMCI entry.
  */
 static void
 native_lapic_enable_cmc(void)
 {
 	u_int apic_id;
 
 #ifdef DEV_ATPIC
 	if (!x2apic_mode && lapic_map == NULL)
 		return;
 #endif
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: CMCI unmasked\n", apic_id);
 }
 
 static int
 native_lapic_enable_mca_elvt(void)
 {
 	u_int apic_id;
 	uint32_t value;
 	int elvt_count;
 
 #ifdef DEV_ATPIC
 	if (lapic_map == NULL)
 		return (-1);
 #endif
 
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	elvt_count = amd_read_elvt_count();
 	if (elvt_count <= APIC_ELVT_MCA)
 		return (-1);
 
 	value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA);
 	if ((value & APIC_LVT_M) == 0) {
 		if (bootverbose)
 			printf("AMD MCE Thresholding Extended LVT is already active\n");
 		return (APIC_ELVT_MCA);
 	}
 	lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0;
 	lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id);
 	return (APIC_ELVT_MCA);
 }
 
 void
 lapic_handle_error(void)
 {
 	uint32_t esr;
 
 	/*
 	 * Read the contents of the error status register.  Write to
 	 * the register first before reading from it to force the APIC
 	 * to update its value to indicate any errors that have
 	 * occurred since the previous write to the register.
 	 */
 	lapic_write32(LAPIC_ESR, 0);
 	esr = lapic_read32(LAPIC_ESR);
 
 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
 	lapic_eoi();
 }
 
 static u_int
 native_apic_cpuid(u_int apic_id)
 {
 #ifdef SMP
 	return apic_cpuids[apic_id];
 #else
 	return 0;
 #endif
 }
 
 /* Request a free IDT vector to be used by the specified IRQ. */
 static u_int
 native_apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
 	KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
 
 	/*
 	 * Search for a free vector.  Currently we just use a very simple
 	 * algorithm to find the first free vector.
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 		if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE)
 			continue;
 		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 /*
  * Request 'count' free contiguous IDT vectors to be used by 'count'
  * IRQs.  'count' must be a power of two and the vectors will be
  * aligned on a boundary of 'align'.  If the request cannot be
  * satisfied, 0 is returned.
  */
 static u_int
 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
 	KASSERT(powerof2(count), ("bad count"));
 	KASSERT(powerof2(align), ("bad align"));
 	KASSERT(align >= count, ("align < count"));
 #ifdef INVARIANTS
 	for (run = 0; run < count; run++)
 		KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u",
 		    irqs[run], run));
 #endif
 
 	/*
 	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
 	 * this just uses a simple first fit algorithm.
 	 */
 	run = 0;
 	first = 0;
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
 		if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) {
 			run = 0;
 			first = 0;
 			continue;
 		}
 
 		/* Start a new run if run == 0 and vector is aligned. */
 		if (run == 0) {
 			if ((vector & (align - 1)) != 0)
 				continue;
 			first = vector;
 		}
 		run++;
 
 		/* Keep looping if the run isn't long enough yet. */
 		if (run < count)
 			continue;
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
 			lapics[apic_id].la_ioint_irqs[first + vector] =
 			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
 	return (0);
 }
 
 /*
  * Enable a vector for a particular apic_id.  Since all lapics share idt
  * entries and ioint_handlers this enables the vector on all lapics.  lapics
  * which do not have the vector configured would report spurious interrupts
  * should it fire.
  */
 static void
 native_apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
 	    SDT_APIC, SEL_KPL, GSEL_APIC);
 }
 
 static void
 native_apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef notyet
 	/*
 	 * We can not currently clear the idt entry because other cpus
 	 * may have a valid vector at this offset.
 	 */
 	setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
 	    SEL_KPL, GSEL_APIC);
 #endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
 static void
 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 	struct thread *td;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
 	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
 	    irq, ("IRQ mismatch"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 
 	/*
 	 * Bind us to the cpu that owned the vector before freeing it so
 	 * we don't lose an interrupt delivery race.
 	 */
 	td = curthread;
 	if (!rebooting) {
 		thread_lock(td);
 		if (sched_is_bound(td))
 			panic("apic_free_vector: Thread already bound.\n");
 		sched_bind(td, apic_cpuid(apic_id));
 		thread_unlock(td);
 	}
 	mtx_lock_spin(&icu_lock);
 	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE;
 	mtx_unlock_spin(&icu_lock);
 	if (!rebooting) {
 		thread_lock(td);
 		sched_unbind(td);
 		thread_unlock(td);
 	}
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 static u_int
 apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 	int irq;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
 	if (irq < 0)
 		irq = 0;
 	return (irq);
 }
 
 #ifdef DDB
 /*
  * Dump data about APIC IDT vector mappings.
  */
 DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
 	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
 		verbose = 2;
 	else if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	for (apic_id = 0; apic_id <= max_apic_id; apic_id++) {
 		if (lapics[apic_id].la_present == 0)
 			continue;
 		db_printf("Interrupts bound to lapic %u\n", apic_id);
 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
 			irq = lapics[apic_id].la_ioint_irqs[i];
 			if (irq == IRQ_FREE || irq == IRQ_SYSCALL)
 				continue;
 #ifdef KDTRACE_HOOKS
 			if (irq == IRQ_DTRACE_RET)
 				continue;
 #endif
 #ifdef XENHVM
 			if (irq == IRQ_EVTCHN)
 				continue;
 #endif
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");
 			else if (irq < num_io_irqs) {
 				isrc = intr_lookup_source(irq);
 				if (isrc == NULL || verbose == 0)
 					db_printf("IRQ %u\n", irq);
 				else
 					db_dump_intr_event(isrc->is_event,
 					    verbose == 2);
 			} else
 				db_printf("IRQ %u ???\n", irq);
 		}
 	}
 }
 
 static void
 dump_mask(const char *prefix, uint32_t v, int base)
 {
 	int i, first;
 
 	first = 1;
 	for (i = 0; i < 32; i++)
 		if (v & (1 << i)) {
 			if (first) {
 				db_printf("%s:", prefix);
 				first = 0;
 			}
 			db_printf(" %02x", base + i);
 		}
 	if (!first)
 		db_printf("\n");
 }
 
 /* Show info from the lapic regs for this CPU. */
 DB_SHOW_COMMAND(lapic, db_show_lapic)
 {
 	uint32_t v;
 
 	db_printf("lapic ID = %d\n", lapic_id());
 	v = lapic_read32(LAPIC_VERSION);
 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
 	    v & 0xf);
 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
 	v = lapic_read32(LAPIC_SVR);
 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
 	db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
 
 #define dump_field(prefix, regn, index)					\
 	dump_mask(__XSTRING(prefix ## index), 				\
 	    lapic_read32(LAPIC_ ## regn ## index),			\
 	    index * 32)
 
 	db_printf("In-service Interrupts:\n");
 	dump_field(isr, ISR, 0);
 	dump_field(isr, ISR, 1);
 	dump_field(isr, ISR, 2);
 	dump_field(isr, ISR, 3);
 	dump_field(isr, ISR, 4);
 	dump_field(isr, ISR, 5);
 	dump_field(isr, ISR, 6);
 	dump_field(isr, ISR, 7);
 
 	db_printf("TMR Interrupts:\n");
 	dump_field(tmr, TMR, 0);
 	dump_field(tmr, TMR, 1);
 	dump_field(tmr, TMR, 2);
 	dump_field(tmr, TMR, 3);
 	dump_field(tmr, TMR, 4);
 	dump_field(tmr, TMR, 5);
 	dump_field(tmr, TMR, 6);
 	dump_field(tmr, TMR, 7);
 
 	db_printf("IRR Interrupts:\n");
 	dump_field(irr, IRR, 0);
 	dump_field(irr, IRR, 1);
 	dump_field(irr, IRR, 2);
 	dump_field(irr, IRR, 3);
 	dump_field(irr, IRR, 4);
 	dump_field(irr, IRR, 5);
 	dump_field(irr, IRR, 6);
 	dump_field(irr, IRR, 7);
 
 #undef dump_field
 }
 #endif
 
 /*
  * APIC probing support code.  This includes code to manage enumerators.
  */
 
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
 
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {
 #ifdef INVARIANTS
 	struct apic_enumerator *apic_enum;
 
 	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
 		if (apic_enum == enumerator)
 			panic("%s: Duplicate register of %s", __func__,
 			    enumerator->apic_name);
 	}
 #endif
 	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
 }
 
 /*
  * We have to look for CPU's very, very early because certain subsystems
  * want to know how many CPU's we have extremely early on in the boot
  * process.
  */
 static void
 apic_init(void *dummy __unused)
 {
 	struct apic_enumerator *enumerator;
 	int retval, best;
 
 	/* We only support built in local APICs. */
 	if (!(cpu_feature & CPUID_APIC))
 		return;
 
 	/* Don't probe if APIC mode is disabled. */
 	if (resource_disabled("apic", 0))
 		return;
 
 	/* Probe all the enumerators to find the best match. */
 	best_enum = NULL;
 	best = 0;
 	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
 		retval = enumerator->apic_probe();
 		if (retval > 0)
 			continue;
 		if (best_enum == NULL || best < retval) {
 			best_enum = enumerator;
 			best = retval;
 		}
 	}
 	if (best_enum == NULL) {
 		if (bootverbose)
 			printf("APIC: Could not find any APICs.\n");
 #ifndef DEV_ATPIC
 		panic("running without device atpic requires a local APIC");
 #endif
 		return;
 	}
 
 	if (bootverbose)
 		printf("APIC: Using the %s enumerator.\n",
 		    best_enum->apic_name);
 
 #ifdef I686_CPU
 	/*
 	 * To work around an errata, we disable the local APIC on some
 	 * CPUs during early startup.  We need to turn the local APIC back
 	 * on on such CPUs now.
 	 */
 	ppro_reenable_apic();
 #endif
 
 	/* Probe the CPU's in the system. */
 	retval = best_enum->apic_probe_cpus();
 	if (retval != 0)
 		printf("%s: Failed to probe CPUs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 }
 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
 
 /*
  * Setup the local APIC.  We have to do this prior to starting up the APs
  * in the SMP case.
  */
 static void
 apic_setup_local(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 
 	lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC,
 	    M_WAITOK | M_ZERO);
 
 	/* Initialize the local APIC. */
 	retval = best_enum->apic_setup_local();
 	if (retval != 0)
 		printf("%s: Failed to setup the local APIC: returned %d\n",
 		    best_enum->apic_name, retval);
 }
 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
 
 /*
  * Setup the I/O APICs.
  */
 static void
 apic_setup_io(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 
 	/*
 	 * Local APIC must be registered before other PICs and pseudo PICs
 	 * for proper suspend/resume order.
 	 */
 	intr_register_pic(&lapic_pic);
 
 	retval = best_enum->apic_setup_io();
 	if (retval != 0)
 		printf("%s: Failed to setup I/O APICs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 	/*
 	 * Finish setting up the local APIC on the BSP once we know
 	 * how to properly program the LINT pins.  In particular, this
 	 * enables the EOI suppression mode, if LAPIC supports it and
 	 * user did not disable the mode.
 	 */
 	lapic_setup(1);
 	if (bootverbose)
 		lapic_dump("BSP");
 
 	/* Enable the MSI "pic". */
 	init_ops.msi_init();
 
 #ifdef XENHVM
 	xen_intr_alloc_irqs();
 #endif
 }
 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
 
 #ifdef SMP
 /*
  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
  * private to the MD code.  The public interface for the rest of the
  * kernel is defined in mp_machdep.c.
  */
 
 /*
  * Wait delay microseconds for IPI to be sent.  If delay is -1, we
  * wait forever.
  */
 static int
 native_lapic_ipi_wait(int delay)
 {
 	uint64_t rx;
 
 	/* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
 	if (x2apic_mode)
 		return (1);
 
 	for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) {
 		if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
 		    APIC_DELSTAT_IDLE)
 			return (1);
 		ia32_pause();
 	}
 	return (0);
 }
 
 static void
 native_lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 	uint64_t icr;
 	uint32_t vhi, vlo;
 	register_t saveintr;
 
 	/* XXX: Need more sanity checking of icrlo? */
 	KASSERT(x2apic_mode || lapic_map != NULL,
 	    ("%s called too early", __func__));
 	KASSERT(x2apic_mode ||
 	    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 	    ("%s: invalid dest field", __func__));
 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
 	    ("%s: reserved bits set in ICR LO register", __func__));
 
 	/* Set destination in ICR HI register if it is being used. */
 	if (!x2apic_mode) {
 		saveintr = intr_disable();
 		icr = lapic_read_icr();
 	}
 
 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
 		if (x2apic_mode) {
 			vhi = dest;
 		} else {
 			vhi = icr >> 32;
 			vhi &= ~APIC_ID_MASK;
 			vhi |= dest << APIC_ID_SHIFT;
 		}
 	} else {
 		vhi = 0;
 	}
 
 	/* Program the contents of the IPI and dispatch it. */
 	if (x2apic_mode) {
 		vlo = icrlo;
 	} else {
 		vlo = icr;
 		vlo &= APIC_ICRLO_RESV_MASK;
 		vlo |= icrlo;
 	}
 	lapic_write_icr(vhi, vlo);
 	if (!x2apic_mode)
 		intr_restore(saveintr);
 }
 
 #define	BEFORE_SPIN	50000
 #ifdef DETECT_DEADLOCK
 #define	AFTER_SPIN	50
 #endif
 
 static void
 native_lapic_ipi_vectored(u_int vector, int dest)
 {
 	register_t icrlo, destfield;
 
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
 
 	/*
 	 * NMI IPIs are just fake vectors used to send a NMI.  Use special rules
 	 * regarding NMIs if passed, otherwise specify the vector.
 	 */
 	if (vector >= IPI_NMI_FIRST)
 		icrlo |= APIC_DELMODE_NMI;
 	else
 		icrlo |= vector | APIC_DELMODE_FIXED;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:
 		icrlo |= APIC_DEST_SELF;
 		break;
 	case APIC_IPI_DEST_ALL:
 		icrlo |= APIC_DEST_ALLISELF;
 		break;
 	case APIC_IPI_DEST_OTHERS:
 		icrlo |= APIC_DEST_ALLESELF;
 		break;
 	default:
 		KASSERT(x2apic_mode ||
 		    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 		    ("%s: invalid destination 0x%x", __func__, dest));
 		destfield = dest;
 	}
 
 	/* Wait for an earlier IPI to finish. */
 	if (!lapic_ipi_wait(BEFORE_SPIN)) {
 		if (panicstr != NULL)
 			return;
 		else
 			panic("APIC: Previous IPI is stuck");
 	}
 
 	lapic_ipi_raw(icrlo, destfield);
 
 #ifdef DETECT_DEADLOCK
 	/* Wait for IPI to be delivered. */
 	if (!lapic_ipi_wait(AFTER_SPIN)) {
 #ifdef needsattention
 		/*
 		 * XXX FIXME:
 		 *
 		 * The above function waits for the message to actually be
 		 * delivered.  It breaks out after an arbitrary timeout
 		 * since the message should eventually be delivered (at
 		 * least in theory) and that if it wasn't we would catch
 		 * the failure with the check above when the next IPI is
 		 * sent.
 		 *
 		 * We could skip this wait entirely, EXCEPT it probably
 		 * protects us from other routines that assume that the
 		 * message was delivered and acted upon when this function
 		 * returns.
 		 */
 		printf("APIC: IPI might be stuck\n");
 #else /* !needsattention */
 		/* Wait until mesage is sent without a timeout. */
 		while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
 			ia32_pause();
 #endif /* needsattention */
 	}
 #endif /* DETECT_DEADLOCK */
 }
 
 #endif /* SMP */
 
 /*
  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
  * visible.
  *
  * Consider the case where an IPI is generated immediately after allocation:
  *     vector = lapic_ipi_alloc(ipifunc);
  *     ipi_selected(other_cpus, vector);
  *
  * In xAPIC mode a write to ICR_LO has serializing semantics because the
  * APIC page is mapped as an uncached region. In x2APIC mode there is an
  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
  * the IDT slot update is globally visible before the IPI is delivered.
  */
 static int
 native_lapic_ipi_alloc(inthand_t *ipifunc)
 {
 	struct gate_descriptor *ip;
 	long func;
 	int idx, vector;
 
 	KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
 	    ("invalid ipifunc %p", ipifunc));
 
 	vector = -1;
 	mtx_lock_spin(&icu_lock);
 	for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
 		ip = &idt[idx];
 		func = (ip->gd_hioffset << 16) | ip->gd_looffset;
 		if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) ||
 		    (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) {
 			vector = idx;
 			setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
 			break;
 		}
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (vector);
 }
 
 static void
 native_lapic_ipi_free(int vector)
 {
 	struct gate_descriptor *ip;
 	long func;
 
 	KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	mtx_lock_spin(&icu_lock);
 	ip = &idt[vector];
 	func = (ip->gd_hioffset << 16) | ip->gd_looffset;
 	KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
 	    func != (uintptr_t)&IDTVEC(rsvd_pti),
 	    ("invalid idtfunc %#lx", func));
 	setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
 	    SEL_KPL, GSEL_APIC);
 	mtx_unlock_spin(&icu_lock);
 }
Index: head/sys/x86/x86/mptable.c
===================================================================
--- head/sys/x86/x86/mptable.c	(revision 344854)
+++ head/sys/x86/x86/mptable.c	(revision 344855)
@@ -1,1262 +1,1262 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mptable_force_htt.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/smp.h>
 #ifdef NEW_PCIB
 #include <sys/rman.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <dev/pci/pcivar.h>
 #ifdef NEW_PCIB
 #include <dev/pci/pcib_private.h>
 #endif
 #include <x86/apicreg.h>
 #include <x86/mptable.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #ifdef NEW_PCIB
 #include <machine/resource.h>
 #endif
 #include <machine/specialreg.h>
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define	MP_SIG			0x5f504d5f	/* _MP_ */
 
 #ifdef __amd64__
 #define	MAX_LAPIC_ID		63	/* Max local APIC ID for HTT fixup */
 #else
 #define	MAX_LAPIC_ID		31	/* Max local APIC ID for HTT fixup */
 #endif
 
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 typedef	void mptable_entry_handler(u_char *entry, void *arg);
 typedef	void mptable_extended_entry_handler(ext_entry_ptr entry, void *arg);
 
 /* descriptions of MP table entries */
 typedef struct BASETABLE_ENTRY {
 	uint8_t	type;
 	uint8_t	length;
 	uint8_t	name[16];
 }       basetable_entry;
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 /* From MP spec v1.4, table 4-8. */
 static bus_type_name bus_type_table[] =
 {
 	{UNKNOWN_BUSTYPE, "CBUS  "},
 	{UNKNOWN_BUSTYPE, "CBUSII"},
 	{EISA, "EISA  "},
 	{UNKNOWN_BUSTYPE, "FUTURE"},
 	{UNKNOWN_BUSTYPE, "INTERN"},
 	{ISA, "ISA   "},
 	{UNKNOWN_BUSTYPE, "MBI   "},
 	{UNKNOWN_BUSTYPE, "MBII  "},
 	{MCA, "MCA   "},
 	{UNKNOWN_BUSTYPE, "MPI   "},
 	{UNKNOWN_BUSTYPE, "MPSA  "},
 	{UNKNOWN_BUSTYPE, "NUBUS "},
 	{PCI, "PCI   "},
 	{UNKNOWN_BUSTYPE, "PCMCIA"},
 	{UNKNOWN_BUSTYPE, "TC    "},
 	{UNKNOWN_BUSTYPE, "VL    "},
 	{UNKNOWN_BUSTYPE, "VME   "},
 	{UNKNOWN_BUSTYPE, "XPRESS"}
 };
 
 /* From MP spec v1.4, table 5-1. */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, NOBUS},
 	{1, 0, EISA, 255, NOBUS},
 	{1, 0, EISA, 255, NOBUS},
 	{1, 0, MCA, 255, NOBUS},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 struct pci_probe_table_args {
 	u_char bus;
 	u_char found;
 };
 
 struct pci_route_interrupt_args {
 	u_char bus;		/* Source bus. */
 	u_char irq;		/* Source slot:pin. */
 	int vector;		/* Return value. */
 };
 
 static mpfps_t mpfps;
 static mpcth_t mpct;
 static ext_entry_ptr mpet;
 static void *ioapics[IOAPIC_MAX_ID + 1];
 static bus_datum *busses;
 static int mptable_nioapics, mptable_nbusses, mptable_maxbusid;
 static int pci0 = -1;
 
 static MALLOC_DEFINE(M_MPTABLE, "mptable", "MP Table Items");
 
 static enum intr_polarity conforming_polarity(u_char src_bus,
 	    u_char src_bus_irq);
 static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq);
 static enum intr_polarity intentry_polarity(int_entry_ptr intr);
 static enum intr_trigger intentry_trigger(int_entry_ptr intr);
 static int	lookup_bus_type(char *name);
 static void	mptable_count_items(void);
 static void	mptable_count_items_handler(u_char *entry, void *arg);
 #ifdef MPTABLE_FORCE_HTT
 static void	mptable_hyperthread_fixup(u_int id_mask);
 #endif
 static void	mptable_parse_apics_and_busses(void);
 static void	mptable_parse_apics_and_busses_handler(u_char *entry,
     void *arg);
 static void	mptable_parse_default_config_ints(void);
 static void	mptable_parse_ints(void);
 static void	mptable_parse_ints_handler(u_char *entry, void *arg);
 static void	mptable_parse_io_int(int_entry_ptr intr);
 static void	mptable_parse_local_int(int_entry_ptr intr);
 static void	mptable_pci_probe_table_handler(u_char *entry, void *arg);
 static void	mptable_pci_route_interrupt_handler(u_char *entry, void *arg);
 static void	mptable_pci_setup(void);
 static int	mptable_probe(void);
 static int	mptable_probe_cpus(void);
 static void	mptable_probe_cpus_handler(u_char *entry, void *arg __unused);
 static void	mptable_setup_cpus_handler(u_char *entry, void *arg __unused);
 static void	mptable_register(void *dummy);
 static int	mptable_setup_local(void);
 static int	mptable_setup_io(void);
 #ifdef NEW_PCIB
 static void	mptable_walk_extended_table(
     mptable_extended_entry_handler *handler, void *arg);
 #endif
 static void	mptable_walk_table(mptable_entry_handler *handler, void *arg);
 static int	search_for_sig(u_int32_t target, int count);
 
 static struct apic_enumerator mptable_enumerator = {
 	.apic_name = "MPTable",
 	.apic_probe = mptable_probe,
 	.apic_probe_cpus = mptable_probe_cpus,
 	.apic_setup_local = mptable_setup_local,
 	.apic_setup_io = mptable_setup_io
 };
 
 /*
  * look for the MP spec signature
  */
 
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr;
 
 	addr = (u_int32_t *)BIOS_PADDRTOVADDR(target);
 	for (x = 0; x < count; x += 4)
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 	return (-1);
 }
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strncmp(bus_type_table[x].name, name, 6) == 0)
 			return (bus_type_table[x].type);
 
 	return (UNKNOWN_BUSTYPE);
 }
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 static int
 mptable_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	/* see if EBDA exists */
 	if ((segment = *(u_short *)BIOS_PADDRTOVADDR(0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) ((basemem * 1024) - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	return (ENXIO);
 
 found:
 	mpfps = (mpfps_t)BIOS_PADDRTOVADDR(x);
 
 	/* Map in the configuration table if it exists. */
 	if (mpfps->config_type != 0) {
 		if (bootverbose)
 			printf(
 		"MP Table version 1.%d found using Default Configuration %d\n",
 			    mpfps->spec_rev, mpfps->config_type);
 		if (mpfps->config_type != 5 && mpfps->config_type != 6) {
 			printf(
 			"MP Table Default Configuration %d is unsupported\n",
 			    mpfps->config_type);
 			return (ENXIO);
 		}
 		mpct = NULL;
 	} else {
 		if ((uintptr_t)mpfps->pap >= 1024 * 1024) {
 			printf("%s: Unable to map MP Configuration Table\n",
 			    __func__);
 			return (ENXIO);
 		}
 		mpct = (mpcth_t)BIOS_PADDRTOVADDR((uintptr_t)mpfps->pap);
 		if (mpct->base_table_length + (uintptr_t)mpfps->pap >=
 		    1024 * 1024) {
 			printf("%s: Unable to map end of MP Config Table\n",
 			    __func__);
 			return (ENXIO);
 		}
 		if (mpct->extended_table_length != 0 &&
 		    mpct->extended_table_length + mpct->base_table_length +
 		    (uintptr_t)mpfps->pap < 1024 * 1024)
 			mpet = (ext_entry_ptr)((char *)mpct +
 			    mpct->base_table_length);
 		if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' ||
 		    mpct->signature[2] != 'M' || mpct->signature[3] != 'P') {
 			printf("%s: MP Config Table has bad signature: %c%c%c%c\n",
 			    __func__, mpct->signature[0], mpct->signature[1],
 			    mpct->signature[2], mpct->signature[3]);
 			return (ENXIO);
 		}
 		if (bootverbose)
 			printf(
 			"MP Configuration Table version 1.%d found at %p\n",
 			    mpct->spec_rev, mpct);
 	}
 
 	return (-100);
 }
 
 /*
  * Run through the MP table enumerating CPUs.
  */
 static int
 mptable_probe_cpus(void)
 {
 	u_int cpu_mask;
 
 	/* Is this a pre-defined config? */
 	if (mpfps->config_type != 0) {
 #ifdef SMP
 		mp_ncpus = 2;
 		mp_maxid = 1;
 #endif
 		max_apic_id = 1;
 	} else {
 		mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask);
 	}
 	return (0);
 }
 
 /*
  * Initialize the local APIC on the BSP.
  */
 static int
 mptable_setup_local(void)
 {
 	vm_paddr_t addr;
 	u_int cpu_mask;
 
 	/* Is this a pre-defined config? */
 	printf("MPTable: <");
 	if (mpfps->config_type != 0) {
 		lapic_create(0, 1);
 		lapic_create(1, 0);
 		addr = DEFAULT_APIC_BASE;
 		printf("Default Configuration %d", mpfps->config_type);
 
 	} else {
 		cpu_mask = 0;
 		mptable_walk_table(mptable_setup_cpus_handler, &cpu_mask);
 #ifdef MPTABLE_FORCE_HTT
 		mptable_hyperthread_fixup(cpu_mask);
 #endif
 		addr = mpct->apic_address;
 		printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id,
 		    (int)sizeof(mpct->product_id), mpct->product_id);
 	}
 	printf(">\n");
 	lapic_init(addr);
 	return (0);
 }
 
 /*
  * Run through the MP table enumerating I/O APICs.
  */
 static int
 mptable_setup_io(void)
 {
 	int i;
 	u_char byte;
 
 	/* First, we count individual items and allocate arrays. */
 	mptable_count_items();
 	busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE,
 	    M_WAITOK);
 	for (i = 0; i <= mptable_maxbusid; i++)
 		busses[i].bus_type = NOBUS;
 
 	/* Second, we run through adding I/O APIC's and buses. */
 	mptable_parse_apics_and_busses();	
 
 	/* Third, we run through the table tweaking interrupt sources. */
 	mptable_parse_ints();
 
 	/* Fourth, we register all the I/O APIC's. */
 	for (i = 0; i <= IOAPIC_MAX_ID; i++)
 		if (ioapics[i] != NULL)
 			ioapic_register(ioapics[i]);
 
 	/* Fifth, we setup data structures to handle PCI interrupt routing. */
 	mptable_pci_setup();
 
 	/* Finally, we throw the switch to enable the I/O APIC's. */
 	if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	return (0);
 }
 
 static void
 mptable_register(void *dummy __unused)
 {
 
 	apic_register_enumerator(&mptable_enumerator);
 }
 SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register,
     NULL);
 
 /*
  * Call the handler routine for each entry in the MP config base table.
  */
 static void
 mptable_walk_table(mptable_entry_handler *handler, void *arg)
 {
 	u_int i;
 	u_char *entry;
 
 	entry = (u_char *)(mpct + 1);
 	for (i = 0; i < mpct->entry_count; i++) {
 		switch (*entry) {
 		case MPCT_ENTRY_PROCESSOR:
 		case MPCT_ENTRY_IOAPIC:
 		case MPCT_ENTRY_BUS:
 		case MPCT_ENTRY_INT:
 		case MPCT_ENTRY_LOCAL_INT:
 			break;
 		default:
 			panic("%s: Unknown MP Config Entry %d\n", __func__,
 			    (int)*entry);
 		}
 		handler(entry, arg);
 		entry += basetable_entry_types[*entry].length;
 	}
 }
 
 #ifdef NEW_PCIB
 /*
  * Call the handler routine for each entry in the MP config extended
  * table.
  */
 static void
 mptable_walk_extended_table(mptable_extended_entry_handler *handler, void *arg)
 {
 	ext_entry_ptr end, entry;
 
 	if (mpet == NULL)
 		return;
 	entry = mpet;
 	end = (ext_entry_ptr)((char *)mpet + mpct->extended_table_length);
 	while (entry < end) {
 		handler(entry, arg);
 		entry = (ext_entry_ptr)((char *)entry + entry->length);
 	}
 }
 #endif
 
 static void
 mptable_probe_cpus_handler(u_char *entry, void *arg)
 {
 	proc_entry_ptr proc;
 
 	switch (*entry) {
 	case MPCT_ENTRY_PROCESSOR:
 		proc = (proc_entry_ptr)entry;
 		if (proc->cpu_flags & PROCENTRY_FLAG_EN &&
 		    proc->apic_id < MAX_LAPIC_ID && mp_ncpus < MAXCPU) {
 #ifdef SMP
 			mp_ncpus++;
 			mp_maxid = mp_ncpus - 1;
 #endif
 			max_apic_id = max(max_apic_id, proc->apic_id);
 		}
 		break;
 	}
 }
 
 
 static void
 mptable_setup_cpus_handler(u_char *entry, void *arg)
 {
 	proc_entry_ptr proc;
 	u_int *cpu_mask;
 
 	switch (*entry) {
 	case MPCT_ENTRY_PROCESSOR:
 		proc = (proc_entry_ptr)entry;
 		if (proc->cpu_flags & PROCENTRY_FLAG_EN) {
 			lapic_create(proc->apic_id, proc->cpu_flags &
 			    PROCENTRY_FLAG_BP);
 			if (proc->apic_id < MAX_LAPIC_ID) {
 				cpu_mask = (u_int *)arg;
 				*cpu_mask |= (1ul << proc->apic_id);
 			}
 		}
 		break;
 	}
 }
 
 static void
 mptable_count_items_handler(u_char *entry, void *arg __unused)
 {
 	io_apic_entry_ptr apic;
 	bus_entry_ptr bus;
 
 	switch (*entry) {
 	case MPCT_ENTRY_BUS:
 		bus = (bus_entry_ptr)entry;
 		mptable_nbusses++;
 		if (bus->bus_id > mptable_maxbusid)
 			mptable_maxbusid = bus->bus_id;
 		break;
 	case MPCT_ENTRY_IOAPIC:
 		apic = (io_apic_entry_ptr)entry;
 		if (apic->apic_flags & IOAPICENTRY_FLAG_EN)
 			mptable_nioapics++;
 		break;
 	}
 }
 
 /*
  * Count items in the table.
  */
 static void
 mptable_count_items(void)
 {
 
 	/* Is this a pre-defined config? */
 	if (mpfps->config_type != 0) {
 		mptable_nioapics = 1;
 		switch (mpfps->config_type) {
 		case 1:
 		case 2:
 		case 3:
 		case 4:
 			mptable_nbusses = 1;
 			break;
 		case 5:
 		case 6:
 		case 7:
 			mptable_nbusses = 2;
 			break;
 		default:
 			panic("Unknown pre-defined MP Table config type %d",
 			    mpfps->config_type);
 		}
 		mptable_maxbusid = mptable_nbusses - 1;
 	} else
 		mptable_walk_table(mptable_count_items_handler, NULL);
 }
 
 /*
  * Add a bus or I/O APIC from an entry in the table.
  */
 static void
 mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused)
 {
 	io_apic_entry_ptr apic;
 	bus_entry_ptr bus;
 	enum busTypes bus_type;
 	int i;
 
 
 	switch (*entry) {
 	case MPCT_ENTRY_BUS:
 		bus = (bus_entry_ptr)entry;
 		bus_type = lookup_bus_type(bus->bus_type);
 		if (bus_type == UNKNOWN_BUSTYPE) {
 			printf("MPTable: Unknown bus %d type \"", bus->bus_id);
 			for (i = 0; i < 6; i++)
 				printf("%c", bus->bus_type[i]);
 			printf("\"\n");
 		}
 		busses[bus->bus_id].bus_id = bus->bus_id;
 		busses[bus->bus_id].bus_type = bus_type;
 		break;
 	case MPCT_ENTRY_IOAPIC:
 		apic = (io_apic_entry_ptr)entry;
 		if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN))
 			break;
 		if (apic->apic_id > IOAPIC_MAX_ID)
 			panic("%s: I/O APIC ID %d too high", __func__,
 			    apic->apic_id);
 		if (ioapics[apic->apic_id] != NULL)
 			panic("%s: Double APIC ID %d", __func__,
 			    apic->apic_id);
 		ioapics[apic->apic_id] = ioapic_create(apic->apic_address,
 		    apic->apic_id, -1);
 		break;
 	default:
 		break;
 	}
 }
 
 /*
  * Enumerate I/O APIC's and buses.
  */
 static void
 mptable_parse_apics_and_busses(void)
 {
 
 	/* Is this a pre-defined config? */
 	if (mpfps->config_type != 0) {
 		ioapics[2] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0);
 		busses[0].bus_id = 0;
 		busses[0].bus_type = default_data[mpfps->config_type - 1][2];
 		if (mptable_nbusses > 1) {
 			busses[1].bus_id = 1;
 			busses[1].bus_type =
 			    default_data[mpfps->config_type - 1][4];
 		}
 	} else
 		mptable_walk_table(mptable_parse_apics_and_busses_handler,
 		    NULL);
 }
 
 /*
  * Determine conforming polarity for a given bus type.
  */
 static enum intr_polarity
 conforming_polarity(u_char src_bus, u_char src_bus_irq)
 {
 
 	KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
 	switch (busses[src_bus].bus_type) {
 	case ISA:
 	case EISA:
 		return (INTR_POLARITY_HIGH);
 	case PCI:
 		return (INTR_POLARITY_LOW);
 	default:
 		panic("%s: unknown bus type %d", __func__,
 		    busses[src_bus].bus_type);
 	}
 }
 
 /*
  * Determine conforming trigger for a given bus type.
  */
 static enum intr_trigger
 conforming_trigger(u_char src_bus, u_char src_bus_irq)
 {
 
 	KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
 	switch (busses[src_bus].bus_type) {
 	case ISA:
 		if (elcr_found)
 			return (elcr_read_trigger(src_bus_irq));
 		else
 			return (INTR_TRIGGER_EDGE);
 	case PCI:
 		return (INTR_TRIGGER_LEVEL);
 
 	case EISA:
 		KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
 		KASSERT(elcr_found, ("Missing ELCR"));
 		return (elcr_read_trigger(src_bus_irq));
 
 	default:
 		panic("%s: unknown bus type %d", __func__,
 		    busses[src_bus].bus_type);
 	}
 }
 
 static enum intr_polarity
 intentry_polarity(int_entry_ptr intr)
 {
 
 	switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) {
 	case INTENTRY_FLAGS_POLARITY_CONFORM:
 		return (conforming_polarity(intr->src_bus_id,
 			    intr->src_bus_irq));
 	case INTENTRY_FLAGS_POLARITY_ACTIVEHI:
 		return (INTR_POLARITY_HIGH);
 	case INTENTRY_FLAGS_POLARITY_ACTIVELO:
 		return (INTR_POLARITY_LOW);
 	default:
 		panic("Bogus interrupt flags");
 	}
 }
 
 static enum intr_trigger
 intentry_trigger(int_entry_ptr intr)
 {
 
 	switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) {
 	case INTENTRY_FLAGS_TRIGGER_CONFORM:
 		return (conforming_trigger(intr->src_bus_id,
 			    intr->src_bus_irq));
 	case INTENTRY_FLAGS_TRIGGER_EDGE:
 		return (INTR_TRIGGER_EDGE);
 	case INTENTRY_FLAGS_TRIGGER_LEVEL:
 		return (INTR_TRIGGER_LEVEL);
 	default:
 		panic("Bogus interrupt flags");
 	}
 }
 
 /*
  * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC.
  */
 static void
 mptable_parse_io_int(int_entry_ptr intr)
 {
 	void *ioapic;
 	u_int pin, apic_id;
 
 	apic_id = intr->dst_apic_id;
 	if (intr->dst_apic_id == 0xff) {
 		/*
 		 * An APIC ID of 0xff means that the interrupt is connected
 		 * to the specified pin on all I/O APICs in the system.  If
 		 * there is only one I/O APIC, then use that APIC to route
 		 * the interrupts.  If there is more than one I/O APIC, then
 		 * punt.
 		 */
 		if (mptable_nioapics == 1) {
 			apic_id = 0;
 			while (ioapics[apic_id] == NULL)
 				apic_id++;
 		} else {
 			printf(
 			"MPTable: Ignoring global interrupt entry for pin %d\n",
 			    intr->dst_apic_int);
 			return;
 		}
 	}
 	if (apic_id > IOAPIC_MAX_ID) {
 		printf("MPTable: Ignoring interrupt entry for ioapic%d\n",
 		    intr->dst_apic_id);
 		return;
 	}
 	ioapic = ioapics[apic_id];
 	if (ioapic == NULL) {
 		printf(
 	"MPTable: Ignoring interrupt entry for missing ioapic%d\n",
 		    apic_id);
 		return;
 	}
 	pin = intr->dst_apic_int;
 	switch (intr->int_type) {
 	case INTENTRY_TYPE_INT:
 		switch (busses[intr->src_bus_id].bus_type) {
 		case NOBUS:
 			panic("interrupt from missing bus");
 		case ISA:
 		case EISA:
 			if (busses[intr->src_bus_id].bus_type == ISA)
 				ioapic_set_bus(ioapic, pin, APIC_BUS_ISA);
 			else
 				ioapic_set_bus(ioapic, pin, APIC_BUS_EISA);
 			if (intr->src_bus_irq == pin)
 				break;
 			ioapic_remap_vector(ioapic, pin, intr->src_bus_irq);
 			if (ioapic_get_vector(ioapic, intr->src_bus_irq) ==
 			    intr->src_bus_irq)
 				ioapic_disable_pin(ioapic, intr->src_bus_irq);
 			break;
 		case PCI:
 			ioapic_set_bus(ioapic, pin, APIC_BUS_PCI);
 			break;
 		default:
 			ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN);
 			break;
 		}
 		break;
 	case INTENTRY_TYPE_NMI:
 		ioapic_set_nmi(ioapic, pin);
 		break;
 	case INTENTRY_TYPE_SMI:
 		ioapic_set_smi(ioapic, pin);
 		break;
 	case INTENTRY_TYPE_EXTINT:
 		ioapic_set_extint(ioapic, pin);
 		break;
 	default:
 		panic("%s: invalid interrupt entry type %d\n", __func__,
 		    intr->int_type);
 	}
 	if (intr->int_type == INTENTRY_TYPE_INT ||
 	    (intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
 	    INTENTRY_FLAGS_TRIGGER_CONFORM)
 		ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr));
 	if (intr->int_type == INTENTRY_TYPE_INT ||
 	    (intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
 	    INTENTRY_FLAGS_POLARITY_CONFORM)
 		ioapic_set_polarity(ioapic, pin, intentry_polarity(intr));
 }
 
 /*
  * Parse an interrupt entry for a local APIC LVT pin.
  */
 static void
 mptable_parse_local_int(int_entry_ptr intr)
 {
 	u_int apic_id, pin;
 
 	if (intr->dst_apic_id == 0xff)
 		apic_id = APIC_ID_ALL;
 	else
 		apic_id = intr->dst_apic_id;
 	if (intr->dst_apic_int == 0)
 		pin = APIC_LVT_LINT0;
 	else
 		pin = APIC_LVT_LINT1;
 	switch (intr->int_type) {
 	case INTENTRY_TYPE_INT:
 #if 1
 		printf(
 	"MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n",
 		    intr->dst_apic_int, intr->src_bus_irq);
 		return;
 #else
 		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED);
 		break;
 #endif
 	case INTENTRY_TYPE_NMI:
 		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI);
 		break;
 	case INTENTRY_TYPE_SMI:
 		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI);
 		break;
 	case INTENTRY_TYPE_EXTINT:
 		lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT);
 		break;
 	default:
 		panic("%s: invalid interrupt entry type %d\n", __func__,
 		    intr->int_type);
 	}
 	if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
 	    INTENTRY_FLAGS_TRIGGER_CONFORM)
 		lapic_set_lvt_triggermode(apic_id, pin,
 		    intentry_trigger(intr));
 	if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
 	    INTENTRY_FLAGS_POLARITY_CONFORM)
 		lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr));
 }
 
 /*
  * Parse interrupt entries.
  */
 static void
 mptable_parse_ints_handler(u_char *entry, void *arg __unused)
 {
 	int_entry_ptr intr;
 
 	intr = (int_entry_ptr)entry;
 	switch (*entry) {
 	case MPCT_ENTRY_INT:
 		mptable_parse_io_int(intr);
 		break;
 	case MPCT_ENTRY_LOCAL_INT:
 		mptable_parse_local_int(intr);
 		break;
 	}
 }
 
 /*
  * Configure interrupt pins for a default configuration.  For details see
  * Table 5-2 in Section 5 of the MP Table specification.
  */
 static void
 mptable_parse_default_config_ints(void)
 {
 	struct INTENTRY entry;
 	int pin;
 
 	/*
 	 * All default configs route IRQs from bus 0 to the first 16 pins
 	 * of the first I/O APIC with an APIC ID of 2.
 	 */
 	entry.type = MPCT_ENTRY_INT;
 	entry.int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
 	    INTENTRY_FLAGS_TRIGGER_CONFORM;
 	entry.src_bus_id = 0;
 	entry.dst_apic_id = 2;
 
 	/* Run through all 16 pins. */
 	for (pin = 0; pin < 16; pin++) {
 		entry.dst_apic_int = pin;
 		switch (pin) {
 		case 0:
 			/* Pin 0 is an ExtINT pin. */
 			entry.int_type = INTENTRY_TYPE_EXTINT;
 			break;
 		case 2:
 			/* IRQ 0 is routed to pin 2. */
 			entry.int_type = INTENTRY_TYPE_INT;
 			entry.src_bus_irq = 0;
 			break;
 		default:
 			/* All other pins are identity mapped. */
 			entry.int_type = INTENTRY_TYPE_INT;
 			entry.src_bus_irq = pin;
 			break;
 		}
 		mptable_parse_io_int(&entry);
 	}
 
 	/* Certain configs disable certain pins. */
 	if (mpfps->config_type == 7)
 		ioapic_disable_pin(ioapics[2], 0);
 	if (mpfps->config_type == 2) {
 		ioapic_disable_pin(ioapics[2], 2);
 		ioapic_disable_pin(ioapics[2], 13);
 	}
 }
 
 /*
  * Configure the interrupt pins
  */
 static void
 mptable_parse_ints(void)
 {
 
 	/* Is this a pre-defined config? */
 	if (mpfps->config_type != 0) {
 		/* Configure LINT pins. */
 		lapic_set_lvt_mode(APIC_ID_ALL, APIC_LVT_LINT0,
 		    APIC_LVT_DM_EXTINT);
 		lapic_set_lvt_mode(APIC_ID_ALL, APIC_LVT_LINT1, APIC_LVT_DM_NMI);
 
 		/* Configure I/O APIC pins. */
 		mptable_parse_default_config_ints();
 	} else
 		mptable_walk_table(mptable_parse_ints_handler, NULL);
 }
 
 #ifdef MPTABLE_FORCE_HTT
 /*
  * Perform a hyperthreading "fix-up" to enumerate any logical CPU's
  * that aren't already listed in the table.
  *
  * XXX: We assume that all of the physical CPUs in the
  * system have the same number of logical CPUs.
  *
  * XXX: We assume that APIC ID's are allocated such that
  * the APIC ID's for a physical processor are aligned
  * with the number of logical CPU's in the processor.
  */
 static void
 mptable_hyperthread_fixup(u_int id_mask)
 {
 	u_int i, id, logical_cpus;
 
 	/* Nothing to do if there is no HTT support. */
 	if ((cpu_feature & CPUID_HTT) == 0)
 		return;
 	logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 	if (logical_cpus <= 1)
 		return;
 
 	/*
 	 * For each APIC ID of a CPU that is set in the mask,
 	 * scan the other candidate APIC ID's for this
 	 * physical processor.  If any of those ID's are
 	 * already in the table, then kill the fixup.
 	 */
 	for (id = 0; id <= MAX_LAPIC_ID; id++) {
 		if ((id_mask & 1 << id) == 0)
 			continue;
 		/* First, make sure we are on a logical_cpus boundary. */
 		if (id % logical_cpus != 0)
 			return;
 		for (i = id + 1; i < id + logical_cpus; i++)
 			if ((id_mask & 1 << i) != 0)
 				return;
 	}
 
 	/*
 	 * Ok, the ID's checked out, so perform the fixup by
 	 * adding the logical CPUs.
 	 */
 	while ((id = ffs(id_mask)) != 0) {
 		id--;
 		for (i = id + 1; i < id + logical_cpus; i++) {
 			if (bootverbose)
 				printf(
 			"MPTable: Adding logical CPU %d from main CPU %d\n",
 				    i, id);
 			lapic_create(i, 0);
 		}
 		id_mask &= ~(1 << id);
 	}
 }
 #endif /* MPTABLE_FORCE_HTT */
 
 /*
  * Support code for routing PCI interrupts using the MP Table.
  */
 static void
 mptable_pci_setup(void)
 {
 	int i;
 
 	/*
 	 * Find the first pci bus and call it 0.  Panic if pci0 is not
 	 * bus zero and there are multiple PCI buses.
 	 */
 	for (i = 0; i <= mptable_maxbusid; i++)
 		if (busses[i].bus_type == PCI) {
 			if (pci0 == -1)
 				pci0 = i;
 			else if (pci0 != 0)
 				panic(
 		"MPTable contains multiple PCI buses but no PCI bus 0");
 		}
 }
 
 static void
 mptable_pci_probe_table_handler(u_char *entry, void *arg)
 {
 	struct pci_probe_table_args *args;
 	int_entry_ptr intr;
 
 	if (*entry != MPCT_ENTRY_INT)
 		return;
 	intr = (int_entry_ptr)entry;
 	args = (struct pci_probe_table_args *)arg;
 	KASSERT(args->bus <= mptable_maxbusid,
 	    ("bus %d is too big", args->bus));
 	KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus"));
 	if (intr->src_bus_id == args->bus)
 		args->found = 1;
 }
 
 int
 mptable_pci_probe_table(int bus)
 {
 	struct pci_probe_table_args args;
 
 	if (bus < 0)
 		return (EINVAL);
 	if (mpct == NULL || pci0 == -1 || pci0 + bus > mptable_maxbusid)
 		return (ENXIO);
 	if (busses[pci0 + bus].bus_type != PCI)
 		return (ENXIO);
 	args.bus = pci0 + bus;
 	args.found = 0;
 	mptable_walk_table(mptable_pci_probe_table_handler, &args);
 	if (args.found == 0)
 		return (ENXIO);
 	return (0);
 }
 
 static void
 mptable_pci_route_interrupt_handler(u_char *entry, void *arg)
 {
 	struct pci_route_interrupt_args *args;
 	int_entry_ptr intr;
 	int vector;
 
 	if (*entry != MPCT_ENTRY_INT)
 		return;
 	intr = (int_entry_ptr)entry;
 	args = (struct pci_route_interrupt_args *)arg;
 	if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq)
 		return;
 
 	/* Make sure the APIC maps to a known APIC. */
 	KASSERT(ioapics[intr->dst_apic_id] != NULL,
 	    ("No I/O APIC %d to route interrupt to", intr->dst_apic_id));
 
 	/*
 	 * Look up the vector for this APIC / pin combination.  If we
 	 * have previously matched an entry for this PCI IRQ but it
 	 * has the same vector as this entry, just return.  Otherwise,
 	 * we use the vector for this APIC / pin combination.
 	 */
 	vector = ioapic_get_vector(ioapics[intr->dst_apic_id],
 	    intr->dst_apic_int);
 	if (args->vector == vector)
 		return;
 	KASSERT(args->vector == -1,
 	    ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n",
 	    args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector,
 	    vector));
 	args->vector = vector;
 }
 
 int
 mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin)
 {
 	struct pci_route_interrupt_args args;
 	int slot;
 
 	/* Like ACPI, pin numbers are 0-3, not 1-4. */
 	pin--;
 	KASSERT(pci0 != -1, ("do not know how to route PCI interrupts"));
 	args.bus = pci_get_bus(dev) + pci0;
 	slot = pci_get_slot(dev);
 
 	/*
 	 * PCI interrupt entries in the MP Table encode both the slot and
 	 * pin into the IRQ with the pin being the two least significant
 	 * bits, the slot being the next five bits, and the most significant
 	 * bit being reserved.
 	 */
 	args.irq = slot << 2 | pin;
 	args.vector = -1;
 	mptable_walk_table(mptable_pci_route_interrupt_handler, &args);
 	if (args.vector < 0) {
 		device_printf(pcib, "unable to route slot %d INT%c\n", slot,
 		    'A' + pin);
 		return (PCI_INVALID_IRQ);
 	}
 	if (bootverbose)
 		device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot,
 		    'A' + pin, args.vector);
 	return (args.vector);
 }
 
 #ifdef NEW_PCIB
 struct host_res_args {
 	struct mptable_hostb_softc *sc;
 	device_t dev;
 	u_char	bus;
 };
 
 /*
  * Initialize a Host-PCI bridge so it can restrict resource allocation
  * requests to the resources it actually decodes according to MP
  * config table extended entries.
  */
 static void
 mptable_host_res_handler(ext_entry_ptr entry, void *arg)
 {
 	struct host_res_args *args;
 	cbasm_entry_ptr cbasm;
 	sas_entry_ptr sas;
 	const char *name;
 	uint64_t start, end;
 	int error, *flagp, flags, type;
 
 	args = arg;
 	switch (entry->type) {
 	case MPCT_EXTENTRY_SAS:
 		sas = (sas_entry_ptr)entry;
 		if (sas->bus_id != args->bus)
 			break;
 		switch (sas->address_type) {
 		case SASENTRY_TYPE_IO:
 			type = SYS_RES_IOPORT;
 			flags = 0;
 			break;
 		case SASENTRY_TYPE_MEMORY:
 			type = SYS_RES_MEMORY;
 			flags = 0;
 			break;
 		case SASENTRY_TYPE_PREFETCH:
 			type = SYS_RES_MEMORY;
 			flags = RF_PREFETCHABLE;
 			break;
 		default:
 			printf(
 	    "MPTable: Unknown systems address space type for bus %u: %d\n",
 			    sas->bus_id, sas->address_type);
 			return;
 		}
 		start = sas->address_base;
 		end = sas->address_base + sas->address_length - 1;
 #ifdef __i386__
 		if (start > ULONG_MAX) {
 			device_printf(args->dev,
 			    "Ignoring %d range above 4GB (%#jx-%#jx)\n",
 			    type, (uintmax_t)start, (uintmax_t)end);
 			break;
 		}
 		if (end > ULONG_MAX) {
 			device_printf(args->dev,
 		    "Truncating end of %d range above 4GB (%#jx-%#jx)\n",
 			    type, (uintmax_t)start, (uintmax_t)end);
 			end = ULONG_MAX;
 		}
 #endif
 		error = pcib_host_res_decodes(&args->sc->sc_host_res, type,
 		    start, end, flags);
 		if (error)
 			panic("Failed to manage %d range (%#jx-%#jx): %d",
 			    type, (uintmax_t)start, (uintmax_t)end, error);
 		break;
 	case MPCT_EXTENTRY_CBASM:
 		cbasm = (cbasm_entry_ptr)entry;
 		if (cbasm->bus_id != args->bus)
 			break;
 		switch (cbasm->predefined_range) {
 		case CBASMENTRY_RANGE_ISA_IO:
 			flagp = &args->sc->sc_decodes_isa_io;
 			name = "ISA I/O";
 			break;
 		case CBASMENTRY_RANGE_VGA_IO:
 			flagp = &args->sc->sc_decodes_vga_io;
 			name = "VGA I/O";
 			break;
 		default:
 			printf(
     "MPTable: Unknown compatiblity address space range for bus %u: %d\n",
 			    cbasm->bus_id, cbasm->predefined_range);
 			return;
 		}
 		if (*flagp != 0)
 			printf(
 		    "MPTable: Duplicate compatibility %s range for bus %u\n",
 			    name, cbasm->bus_id);
 		switch (cbasm->address_mod) {
 		case CBASMENTRY_ADDRESS_MOD_ADD:
 			*flagp = 1;
 			if (bootverbose)
 				device_printf(args->dev, "decoding %s ports\n",
 				    name);
 			break;
 		case CBASMENTRY_ADDRESS_MOD_SUBTRACT:
 			*flagp = -1;
 			if (bootverbose)
 				device_printf(args->dev,
 				    "not decoding %s ports\n", name);
 			break;
 		default:
 			printf(
 	    "MPTable: Unknown compatibility address space modifier: %u\n",
 			    cbasm->address_mod);
 			break;
 		}
 		break;
 	}
 }
 
 void
 mptable_pci_host_res_init(device_t pcib)
 {
 	struct host_res_args args;
 
 	KASSERT(pci0 != -1, ("do not know how to map PCI bus IDs"));
 	args.bus = pci_get_bus(pcib) + pci0;
 	args.dev = pcib;
 	args.sc = device_get_softc(pcib);
 	if (pcib_host_res_init(pcib, &args.sc->sc_host_res) != 0)
 		panic("failed to init hostb resources");
 	mptable_walk_extended_table(mptable_host_res_handler, &args);
 }
 #endif
Index: head/sys/x86/x86/mptable_pci.c
===================================================================
--- head/sys/x86/x86/mptable_pci.c	(revision 344854)
+++ head/sys/x86/x86/mptable_pci.c	(revision 344855)
@@ -1,242 +1,241 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Host to PCI and PCI to PCI bridge drivers that use the MP Table to route
  * interrupts from PCI devices to I/O APICs.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcib_private.h>
 #include <x86/mptable.h>
 #include <x86/legacyvar.h>
 #include <machine/pci_cfgreg.h>
 
 #include "pcib_if.h"
 
 /* Host to PCI bridge driver. */
 
 static int
 mptable_hostb_probe(device_t dev)
 {
 
 	if (pci_cfgregopen() == 0)
 		return (ENXIO);
 	if (mptable_pci_probe_table(pcib_get_bus(dev)) != 0)
 		return (ENXIO);
 	device_set_desc(dev, "MPTable Host-PCI bridge");
 	return (0);
 }
 
 static int
 mptable_hostb_attach(device_t dev)
 {
 
 #ifdef NEW_PCIB
 	mptable_pci_host_res_init(dev);
 #endif
 	device_add_child(dev, "pci", -1);
 	return (bus_generic_attach(dev));
 }
 
 #ifdef NEW_PCIB
 static int
 mptable_is_isa_range(rman_res_t start, rman_res_t end)
 {
 
 	if (end >= 0x10000)
 		return (0);
 	if ((start & 0xfc00) != (end & 0xfc00))
 		return (0);
 	start &= ~0xfc00;
 	end &= ~0xfc00;
 	return (start >= 0x100 && end <= 0x3ff);
 }
 
 static int
 mptable_is_vga_range(rman_res_t start, rman_res_t end)
 {
 	if (end >= 0x10000)
 		return (0);
 	if ((start & 0xfc00) != (end & 0xfc00))
 		return (0);
 	start &= ~0xfc00;
 	end &= ~0xfc00;
 	return (pci_is_vga_ioport_range(start, end));
 }
 
 static struct resource *
 mptable_hostb_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct mptable_hostb_softc *sc;
 
 #ifdef PCI_RES_BUS
 	if (type == PCI_RES_BUS)
 		return (pci_domain_alloc_bus(0, child, rid, start, end, count,
 		    flags));
 #endif
 	sc = device_get_softc(dev);
 	if (type == SYS_RES_IOPORT && start + count - 1 == end) {
 		if (mptable_is_isa_range(start, end)) {
 			switch (sc->sc_decodes_isa_io) {
 			case -1:
 				return (NULL);
 			case 1:
 				return (bus_generic_alloc_resource(dev, child,
 				    type, rid, start, end, count, flags));
 			default:
 				break;
 			}
 		}
 		if (mptable_is_vga_range(start, end)) {
 			switch (sc->sc_decodes_vga_io) {
 			case -1:
 				return (NULL);
 			case 1:
 				return (bus_generic_alloc_resource(dev, child,
 				    type, rid, start, end, count, flags));
 			default:
 				break;
 			}
 		}
 	}
 	start = hostb_alloc_start(type, start, end, count);
 	return (pcib_host_res_alloc(&sc->sc_host_res, child, type, rid, start,
 	    end, count, flags));
 }
 
 static int
 mptable_hostb_adjust_resource(device_t dev, device_t child, int type,
     struct resource *r, rman_res_t start, rman_res_t end)
 {
 	struct mptable_hostb_softc *sc;
 
 #ifdef PCI_RES_BUS
 	if (type == PCI_RES_BUS)
 		return (pci_domain_adjust_bus(0, child, r, start, end));
 #endif
 	sc = device_get_softc(dev);
 	return (pcib_host_res_adjust(&sc->sc_host_res, child, type, r, start,
 	    end));
 }
 #endif
 
 static device_method_t mptable_hostb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		mptable_hostb_probe),
 	DEVMETHOD(device_attach,	mptable_hostb_attach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,	legacy_pcib_read_ivar),
 	DEVMETHOD(bus_write_ivar,	legacy_pcib_write_ivar),
 #ifdef NEW_PCIB
 	DEVMETHOD(bus_alloc_resource,	mptable_hostb_alloc_resource),
 	DEVMETHOD(bus_adjust_resource,	mptable_hostb_adjust_resource),
 #else
 	DEVMETHOD(bus_alloc_resource,	legacy_pcib_alloc_resource),
 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
 #endif
 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
 	DEVMETHOD(bus_release_resource,	legacy_pcib_release_resource),
 #else
 	DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
 #endif
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_maxslots,	legacy_pcib_maxslots),
 	DEVMETHOD(pcib_read_config,	legacy_pcib_read_config),
 	DEVMETHOD(pcib_write_config,	legacy_pcib_write_config),
 	DEVMETHOD(pcib_route_interrupt,	mptable_pci_route_interrupt),
 	DEVMETHOD(pcib_alloc_msi,	legacy_pcib_alloc_msi),
 	DEVMETHOD(pcib_release_msi,	pcib_release_msi),
 	DEVMETHOD(pcib_alloc_msix,	legacy_pcib_alloc_msix),
 	DEVMETHOD(pcib_release_msix,	pcib_release_msix),
 	DEVMETHOD(pcib_map_msi,		legacy_pcib_map_msi),
 
 	DEVMETHOD_END
 };
 
 static devclass_t hostb_devclass;
 
 DEFINE_CLASS_0(pcib, mptable_hostb_driver, mptable_hostb_methods,
     sizeof(struct mptable_hostb_softc));
 DRIVER_MODULE(mptable_pcib, legacy, mptable_hostb_driver, hostb_devclass, 0, 0);
 
 /* PCI to PCI bridge driver. */
 
 static int
 mptable_pcib_probe(device_t dev)
 {
 	int bus;
 
 	if ((pci_get_class(dev) != PCIC_BRIDGE) ||
 	    (pci_get_subclass(dev) != PCIS_BRIDGE_PCI))
 		return (ENXIO);
 	bus = pci_read_config(dev, PCIR_SECBUS_1, 1);
 	if (bus == 0)
 		return (ENXIO);
 	if (mptable_pci_probe_table(bus) != 0)
 		return (ENXIO);
 	device_set_desc(dev, "MPTable PCI-PCI bridge");
 	return (-1000);
 }
 
 static device_method_t mptable_pcib_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		mptable_pcib_probe),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_route_interrupt,	mptable_pci_route_interrupt),
 
 	{0, 0}
 };
 
 static devclass_t pcib_devclass;
 
 DEFINE_CLASS_1(pcib, mptable_pcib_driver, mptable_pcib_pci_methods,
     sizeof(struct pcib_softc), pcib_driver);
 DRIVER_MODULE(mptable_pcib, pci, mptable_pcib_driver, pcib_devclass, 0, 0);
Index: head/sys/x86/xen/pvcpu_enum.c
===================================================================
--- head/sys/x86/xen/pvcpu_enum.c	(revision 344854)
+++ head/sys/x86/xen/pvcpu_enum.c	(revision 344855)
@@ -1,264 +1,264 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 2013 Roger Pau Monné <roger.pau@citrix.com>
  * All rights reserved.
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/smp.h>
 #include <sys/pcpu.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 
 #include <machine/cpu.h>
 #include <machine/smp.h>
 #include <machine/md_var.h>
 
 #include <xen/xen-os.h>
 #include <xen/xen_intr.h>
 #include <xen/hypervisor.h>
 
 #include <xen/interface/vcpu.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/aclocal.h>
 #include <contrib/dev/acpica/include/actables.h>
 
 #include <dev/acpica/acpivar.h>
 
 static int xenpv_probe(void);
 static int xenpv_probe_cpus(void);
 static int xenpv_setup_local(void);
 static int xenpv_setup_io(void);
 
 static ACPI_TABLE_MADT *madt;
 static vm_paddr_t madt_physaddr;
 static vm_offset_t madt_length;
 
 static struct apic_enumerator xenpv_enumerator = {
 	.apic_name = "Xen PV",
 	.apic_probe = xenpv_probe,
 	.apic_probe_cpus = xenpv_probe_cpus,
 	.apic_setup_local = xenpv_setup_local,
 	.apic_setup_io = xenpv_setup_io
 };
 
 /*--------------------- Helper functions to parse MADT -----------------------*/
 
 /*
  * Parse an interrupt source override for an ISA interrupt.
  */
 static void
 madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr)
 {
 	enum intr_trigger trig;
 	enum intr_polarity pol;
 	int ret;
 
 	if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 &&
 	    intr->GlobalIrq == 2) {
 		if (bootverbose)
 			printf("MADT: Skipping timer override\n");
 		return;
 	}
 
 	madt_parse_interrupt_values(intr, &trig, &pol);
 
 	/* Remap the IRQ if it is mapped to a different interrupt vector. */
 	if (intr->SourceIrq != intr->GlobalIrq && intr->GlobalIrq > 15 &&
 	    intr->SourceIrq == AcpiGbl_FADT.SciInterrupt)
 		/*
 		 * If the SCI is remapped to a non-ISA global interrupt,
 		 * then override the vector we use to setup.
 		 */
 		acpi_OverrideInterruptLevel(intr->GlobalIrq);
 
 	/* Register the IRQ with the polarity and trigger mode found. */
 	ret = xen_register_pirq(intr->GlobalIrq, trig, pol);
 	if (ret != 0)
 		panic("Unable to register interrupt override");
 }
 
 /*
  * Call the handler routine for each entry in the MADT table.
  */
 static void
 madt_walk_table(acpi_subtable_handler *handler, void *arg)
 {
 
 	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
 	    handler, arg);
 }
 
 /*
  * Parse interrupt entries.
  */
 static void
 madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused)
 {
 
 	if (entry->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE)
 		madt_parse_interrupt_override(
 		    (ACPI_MADT_INTERRUPT_OVERRIDE *)entry);
 }
 
 /*---------------------------- Xen PV enumerator -----------------------------*/
 
 /*
  * This enumerator will only be registered on PVH
  */
 static int
 xenpv_probe(void)
 {
 	return (0);
 }
 
 /*
  * Test each possible vCPU in order to find the number of vCPUs
  */
 static int
 xenpv_probe_cpus(void)
 {
 #ifdef SMP
 	int i, ret;
 
 	for (i = 0; i < MAXCPU && (i * 2) < MAX_APIC_ID; i++) {
 		ret = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		mp_ncpus = min(mp_ncpus + 1, MAXCPU);
 	}
 	mp_maxid = mp_ncpus - 1;
 	max_apic_id = mp_ncpus * 2;
 #endif
 	return (0);
 }
 
 /*
  * Initialize the vCPU id of the BSP
  */
 static int
 xenpv_setup_local(void)
 {
 #ifdef SMP
 	int i, ret;
 
 	for (i = 0; i < MAXCPU && (i * 2) < MAX_APIC_ID; i++) {
 		ret = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		if (ret >= 0)
 			lapic_create((i * 2), (i == 0));
 	}
 #endif
 
 	PCPU_SET(vcpu_id, 0);
 	lapic_init(0);
 	return (0);
 }
 
 /*
  * On PVH guests there's no IO APIC
  */
 static int
 xenpv_setup_io(void)
 {
 
 	if (xen_initial_domain()) {
 		/*
 		 * NB: we could iterate over the MADT IOAPIC entries in order
 		 * to figure out the exact number of IOAPIC interrupts, but
 		 * this is legacy code so just keep using the previous
 		 * behaviour and assume a maximum of 256 interrupts.
 		 */
 		num_io_irqs = max(255, num_io_irqs);
 
 		acpi_SetDefaultIntrModel(ACPI_INTR_APIC);
 	}
 	return (0);
 }
 
 void
 xenpv_register_pirqs(struct pic *pic __unused)
 {
 	unsigned int i;
 	int ret;
 
 	/* Map MADT */
 	madt_physaddr = acpi_find_table(ACPI_SIG_MADT);
 	madt = acpi_map_table(madt_physaddr, ACPI_SIG_MADT);
 	madt_length = madt->Header.Length;
 
 	/* Try to initialize ACPI so that we can access the FADT. */
 	ret = acpi_Startup();
 	if (ACPI_FAILURE(ret)) {
 		printf("MADT: ACPI Startup failed with %s\n",
 		    AcpiFormatException(ret));
 		printf("Try disabling either ACPI or apic support.\n");
 		panic("Using MADT but ACPI doesn't work");
 	}
 
 	/* Run through the table to see if there are any overrides. */
 	madt_walk_table(madt_parse_ints, NULL);
 
 	/*
 	 * If there was not an explicit override entry for the SCI,
 	 * force it to use level trigger and active-low polarity.
 	 */
 	if (!madt_found_sci_override) {
 		printf(
 "MADT: Forcing active-low polarity and level trigger for SCI\n");
 		ret = xen_register_pirq(AcpiGbl_FADT.SciInterrupt,
 		    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
 		if (ret != 0)
 			panic("Unable to register SCI IRQ");
 	}
 
 	/* Register legacy ISA IRQs */
 	for (i = 1; i < 16; i++) {
 		if (intr_lookup_source(i) != NULL)
 			continue;
 		ret = xen_register_pirq(i, INTR_TRIGGER_EDGE,
 		    INTR_POLARITY_LOW);
 		if (ret != 0 && bootverbose)
 			printf("Unable to register legacy IRQ#%u: %d\n", i,
 			    ret);
 	}
 }
 
 static void
 xenpv_register(void *dummy __unused)
 {
 	if (xen_pv_domain()) {
 		apic_register_enumerator(&xenpv_enumerator);
 	}
 }
 SYSINIT(xenpv_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, xenpv_register, NULL);
Index: head/tests/sys/capsicum/ioctls_test.c
===================================================================
--- head/tests/sys/capsicum/ioctls_test.c	(revision 344854)
+++ head/tests/sys/capsicum/ioctls_test.c	(revision 344855)
@@ -1,127 +1,126 @@
 /*-
  * Copyright (c) 2018 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/capsicum.h>
 #include <sys/filio.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <netinet/in.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include <atf-c.h>
 
 /*
  * A variant of ATF_REQUIRE that is suitable for use in child
  * processes.  This only works if the parent process is tripped up by
  * the early exit and fails some requirement itself.
  */
 #define	CHILD_REQUIRE(exp) do {						\
 		if (!(exp))						\
 			child_fail_require(__FILE__, __LINE__,		\
 			    #exp " not met");				\
 	} while (0)
 
 static __dead2 void
 child_fail_require(const char *file, int line, const char *str)
 {
 	char buf[128];
 
 	snprintf(buf, sizeof(buf), "%s:%d: %s\n", file, line, str);
 	write(2, buf, strlen(buf));
 	_exit(32);
 }
 
 /*
  * Exercise the edge case of a custom ioctl list being copied from a
  * listen socket to an accepted socket.
  */
 ATF_TC_WITHOUT_HEAD(cap_ioctls__listen_copy);
 ATF_TC_BODY(cap_ioctls__listen_copy, tc)
 {
 	struct sockaddr_in sin;
 	cap_rights_t rights;
 	u_long cmds[] = { FIONREAD };
 	socklen_t len;
 	pid_t pid;
 	char dummy;
 	int s[2], status;
 
 	s[0] = socket(AF_INET, SOCK_STREAM, 0);
 	ATF_REQUIRE(s[0] > 0);
 
 	/* Bind to an arbitrary unused port. */
 	memset(&sin, 0, sizeof(sin));
 	sin.sin_len = sizeof(sin);
 	sin.sin_family = AF_INET;
 	sin.sin_port = 0;
 	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 	ATF_REQUIRE(bind(s[0], (struct sockaddr *)&sin, sizeof(sin)) == 0);
 
 	CHILD_REQUIRE(listen(s[0], 1) == 0);
 
 	len = sizeof(sin);
 	ATF_REQUIRE(getsockname(s[0], (struct sockaddr *)&sin, &len) == 0);
 	ATF_REQUIRE(len == sizeof(sin));
 
 	cap_rights_init(&rights, CAP_ACCEPT, CAP_IOCTL);
 	ATF_REQUIRE(cap_rights_limit(s[0], &rights) == 0);
 	ATF_REQUIRE(cap_ioctls_limit(s[0], cmds, nitems(cmds)) == 0);
 
 	pid = fork();
 	if (pid == 0) {
 		s[1] = accept(s[0], NULL, NULL);
 		CHILD_REQUIRE(s[1] > 0);
 
 		/* Close both sockets during exit(). */
 		exit(0);
 	}
 
 	ATF_REQUIRE(pid > 0);
 
 	ATF_REQUIRE(close(s[0]) == 0);
 	s[1] = socket(AF_INET, SOCK_STREAM, 0);
 	ATF_REQUIRE(s[1] > 0);
 	ATF_REQUIRE(connect(s[1], (struct sockaddr *)&sin, sizeof(sin)) == 0);
 	ATF_REQUIRE(read(s[1], &dummy, sizeof(dummy)) == 0);
 	ATF_REQUIRE(close(s[1]) == 0);
 
 	ATF_REQUIRE(wait(&status) == pid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, cap_ioctls__listen_copy);
 
 	return (atf_no_error());
 }
Index: head/tests/sys/kern/ptrace_test.c
===================================================================
--- head/tests/sys/kern/ptrace_test.c	(revision 344854)
+++ head/tests/sys/kern/ptrace_test.c	(revision 344855)
@@ -1,3909 +1,3908 @@
 /*-
  * Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/cpuset.h>
 #include <sys/event.h>
 #include <sys/file.h>
 #include <sys/time.h>
 #include <sys/procctl.h>
 #include <sys/ptrace.h>
 #include <sys/queue.h>
 #include <sys/runq.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 #include <sys/wait.h>
 #include <errno.h>
 #include <machine/cpufunc.h>
 #include <pthread.h>
 #include <sched.h>
 #include <semaphore.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <atf-c.h>
 
 /*
  * Architectures with a user-visible breakpoint().
  */
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) ||	\
     defined(__i386__) || defined(__mips__) || defined(__riscv) ||	\
     defined(__sparc64__)
 #define	HAVE_BREAKPOINT
 #endif
 
 /*
  * Adjust PC to skip over a breakpoint when stopped for a breakpoint trap.
  */
 #ifdef HAVE_BREAKPOINT
 #if defined(__aarch64__)
 #define	SKIP_BREAK(reg)	((reg)->elr += 4)
 #elif defined(__amd64__) || defined(__i386__)
 #define	SKIP_BREAK(reg)
 #elif defined(__arm__)
 #define	SKIP_BREAK(reg)	((reg)->r_pc += 4)
 #elif defined(__mips__)
 #define	SKIP_BREAK(reg)	((reg)->r_regs[PC] += 4)
 #elif defined(__riscv)
 #define	SKIP_BREAK(reg)	((reg)->sepc += 4)
 #elif defined(__sparc64__)
 #define	SKIP_BREAK(reg)	do {						\
 	(reg)->r_tpc = (reg)->r_tnpc + 4;				\
 	(reg)->r_tnpc += 8;						\
 } while (0)
 #endif
 #endif
 
 /*
  * A variant of ATF_REQUIRE that is suitable for use in child
  * processes.  This only works if the parent process is tripped up by
  * the early exit and fails some requirement itself.
  */
 #define	CHILD_REQUIRE(exp) do {						\
 		if (!(exp))						\
 			child_fail_require(__FILE__, __LINE__,		\
 			    #exp " not met");				\
 	} while (0)
 
 static __dead2 void
 child_fail_require(const char *file, int line, const char *str)
 {
 	char buf[128];
 
 	snprintf(buf, sizeof(buf), "%s:%d: %s\n", file, line, str);
 	write(2, buf, strlen(buf));
 	_exit(32);
 }
 
 static void
 trace_me(void)
 {
 
 	/* Attach the parent process as a tracer of this process. */
 	CHILD_REQUIRE(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
 
 	/* Trigger a stop. */
 	raise(SIGSTOP);
 }
 
 static void
 attach_child(pid_t pid)
 {
 	pid_t wpid;
 	int status;
 
 	ATF_REQUIRE(ptrace(PT_ATTACH, pid, NULL, 0) == 0);
 
 	wpid = waitpid(pid, &status, 0);
 	ATF_REQUIRE(wpid == pid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 }
 
 static void
 wait_for_zombie(pid_t pid)
 {
 
 	/*
 	 * Wait for a process to exit.  This is kind of gross, but
 	 * there is not a better way.
 	 *
 	 * Prior to r325719, the kern.proc.pid.<pid> sysctl failed
 	 * with ESRCH.  After that change, a valid struct kinfo_proc
 	 * is returned for zombies with ki_stat set to SZOMB.
 	 */
 	for (;;) {
 		struct kinfo_proc kp;
 		size_t len;
 		int mib[4];
 
 		mib[0] = CTL_KERN;
 		mib[1] = KERN_PROC;
 		mib[2] = KERN_PROC_PID;
 		mib[3] = pid;
 		len = sizeof(kp);
 		if (sysctl(mib, nitems(mib), &kp, &len, NULL, 0) == -1) {
 			ATF_REQUIRE(errno == ESRCH);
 			break;
 		}
 		if (kp.ki_stat == SZOMB)
 			break;
 		usleep(5000);
 	}
 }
 
 /*
  * Verify that a parent debugger process "sees" the exit of a debugged
  * process exactly once when attached via PT_TRACE_ME.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_trace_me);
 ATF_TC_BODY(ptrace__parent_wait_after_trace_me, tc)
 {
 	pid_t child, wpid;
 	int status;
 
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		/* Child process. */
 		trace_me();
 
 		_exit(1);
 	}
 
 	/* Parent process. */
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 	/* The second wait() should report the exit status. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	/* The child should no longer exist. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a parent debugger process "sees" the exit of a debugged
  * process exactly once when attached via PT_ATTACH.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_attach);
 ATF_TC_BODY(ptrace__parent_wait_after_attach, tc)
 {
 	pid_t child, wpid;
 	int cpipe[2], status;
 	char c;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for the parent to attach. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == 0);
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	/* Parent process. */
 
 	/* Attach to the child process. */
 	attach_child(child);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 	/* Signal the child to exit. */
 	close(cpipe[0]);
 
 	/* The second wait() should report the exit status. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	/* The child should no longer exist. */
 	wpid = waitpid(child, &status, 0);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a parent process "sees" the exit of a debugged process only
  * after the debugger has seen it.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_child_debugger);
 ATF_TC_BODY(ptrace__parent_sees_exit_after_child_debugger, tc)
 {
 	pid_t child, debugger, wpid;
 	int cpipe[2], dpipe[2], status;
 	char c;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for parent to be ready. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	ATF_REQUIRE(pipe(dpipe) == 0);
 	ATF_REQUIRE((debugger = fork()) != -1);
 
 	if (debugger == 0) {
 		/* Debugger process. */
 		close(dpipe[0]);
 
 		CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 		/* Signal parent that debugger is attached. */
 		CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Wait for parent's failed wait. */
 		CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == 0);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFEXITED(status));
 		CHILD_REQUIRE(WEXITSTATUS(status) == 1);
 
 		_exit(0);
 	}
 	close(dpipe[1]);
 
 	/* Parent process. */
 
 	/* Wait for the debugger to attach to the child. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Release the child. */
 	ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c));
 	ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0);
 	close(cpipe[0]);
 
 	wait_for_zombie(child);
 
 	/*
 	 * This wait should return a pid of 0 to indicate no status to
 	 * report.  The parent should see the child as non-exited
 	 * until the debugger sees the exit.
 	 */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == 0);
 
 	/* Signal the debugger to wait for the child. */
 	close(dpipe[0]);
 
 	/* Wait for the debugger. */
 	wpid = waitpid(debugger, &status, 0);
 	ATF_REQUIRE(wpid == debugger);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	/* The child process should now be ready. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 }
 
 /*
  * Verify that a parent process "sees" the exit of a debugged process
  * only after a non-direct-child debugger has seen it.  In particular,
  * various wait() calls in the parent must avoid failing with ESRCH by
  * checking the parent's orphan list for the debugee.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_unrelated_debugger);
 ATF_TC_BODY(ptrace__parent_sees_exit_after_unrelated_debugger, tc)
 {
 	pid_t child, debugger, fpid, wpid;
 	int cpipe[2], dpipe[2], status;
 	char c;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for parent to be ready. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	ATF_REQUIRE(pipe(dpipe) == 0);
 	ATF_REQUIRE((debugger = fork()) != -1);
 
 	if (debugger == 0) {
 		/* Debugger parent. */
 
 		/*
 		 * Fork again and drop the debugger parent so that the
 		 * debugger is not a child of the main parent.
 		 */
 		CHILD_REQUIRE((fpid = fork()) != -1);
 		if (fpid != 0)
 			_exit(2);
 
 		/* Debugger process. */
 		close(dpipe[0]);
 
 		CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 		/* Signal parent that debugger is attached. */
 		CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Wait for parent's failed wait. */
 		CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFEXITED(status));
 		CHILD_REQUIRE(WEXITSTATUS(status) == 1);
 
 		_exit(0);
 	}
 	close(dpipe[1]);
 
 	/* Parent process. */
 
 	/* Wait for the debugger parent process to exit. */
 	wpid = waitpid(debugger, &status, 0);
 	ATF_REQUIRE(wpid == debugger);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	/* A WNOHANG wait here should see the non-exited child. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == 0);
 
 	/* Wait for the debugger to attach to the child. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Release the child. */
 	ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c));
 	ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0);
 	close(cpipe[0]);
 
 	wait_for_zombie(child);
 
 	/*
 	 * This wait should return a pid of 0 to indicate no status to
 	 * report.  The parent should see the child as non-exited
 	 * until the debugger sees the exit.
 	 */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == 0);
 
 	/* Signal the debugger to wait for the child. */
 	ATF_REQUIRE(write(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Wait for the debugger. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == 0);
 	close(dpipe[0]);
 
 	/* The child process should now be ready. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 }
 
 /*
  * The parent process should always act the same regardless of how the
  * debugger is attached to it.
  */
 static __dead2 void
 follow_fork_parent(bool use_vfork)
 {
 	pid_t fpid, wpid;
 	int status;
 
 	if (use_vfork)
 		CHILD_REQUIRE((fpid = vfork()) != -1);
 	else
 		CHILD_REQUIRE((fpid = fork()) != -1);
 
 	if (fpid == 0)
 		/* Child */
 		_exit(2);
 
 	wpid = waitpid(fpid, &status, 0);
 	CHILD_REQUIRE(wpid == fpid);
 	CHILD_REQUIRE(WIFEXITED(status));
 	CHILD_REQUIRE(WEXITSTATUS(status) == 2);
 
 	_exit(1);
 }
 
 /*
  * Helper routine for follow fork tests.  This waits for two stops
  * that report both "sides" of a fork.  It returns the pid of the new
  * child process.
  */
 static pid_t
 handle_fork_events(pid_t parent, struct ptrace_lwpinfo *ppl)
 {
 	struct ptrace_lwpinfo pl;
 	bool fork_reported[2];
 	pid_t child, wpid;
 	int i, status;
 
 	fork_reported[0] = false;
 	fork_reported[1] = false;
 	child = -1;
 	
 	/*
 	 * Each process should report a fork event.  The parent should
 	 * report a PL_FLAG_FORKED event, and the child should report
 	 * a PL_FLAG_CHILD event.
 	 */
 	for (i = 0; i < 2; i++) {
 		wpid = wait(&status);
 		ATF_REQUIRE(wpid > 0);
 		ATF_REQUIRE(WIFSTOPPED(status));
 
 		ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 		    sizeof(pl)) != -1);
 		ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) !=
 		    0);
 		ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) !=
 		    (PL_FLAG_FORKED | PL_FLAG_CHILD));
 		if (pl.pl_flags & PL_FLAG_CHILD) {
 			ATF_REQUIRE(wpid != parent);
 			ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 			ATF_REQUIRE(!fork_reported[1]);
 			if (child == -1)
 				child = wpid;
 			else
 				ATF_REQUIRE(child == wpid);
 			if (ppl != NULL)
 				ppl[1] = pl;
 			fork_reported[1] = true;
 		} else {
 			ATF_REQUIRE(wpid == parent);
 			ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 			ATF_REQUIRE(!fork_reported[0]);
 			if (child == -1)
 				child = pl.pl_child_pid;
 			else
 				ATF_REQUIRE(child == pl.pl_child_pid);
 			if (ppl != NULL)
 				ppl[0] = pl;
 			fork_reported[0] = true;
 		}
 	}
 
 	return (child);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork and
  * that the traced parent sees the exit of the child after the debugger
  * when both processes remain attached to the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached);
 ATF_TC_BODY(ptrace__follow_fork_both_attached, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the new
  * child process is detached after it reports its fork.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached);
 ATF_TC_BODY(ptrace__follow_fork_child_detached, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * Should not see any status from the grandchild now, only the
 	 * child.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the
  * traced parent is detached after the fork.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached);
 ATF_TC_BODY(ptrace__follow_fork_parent_detached, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 *
 	 * Even though the child process is detached, it is still a
 	 * child of the debugger, so it will still report it's exit
 	 * after the grandchild.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 attach_fork_parent(int cpipe[2])
 {
 	pid_t fpid;
 
 	close(cpipe[0]);
 
 	/* Double-fork to disassociate from the debugger. */
 	CHILD_REQUIRE((fpid = fork()) != -1);
 	if (fpid != 0)
 		_exit(3);
 	
 	/* Send the pid of the disassociated child to the debugger. */
 	fpid = getpid();
 	CHILD_REQUIRE(write(cpipe[1], &fpid, sizeof(fpid)) == sizeof(fpid));
 
 	/* Wait for the debugger to attach. */
 	CHILD_REQUIRE(read(cpipe[1], &fpid, sizeof(fpid)) == 0);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork and
  * that the traced parent sees the exit of the child after the debugger
  * when both processes remain attached to the debugger.  In this test
  * the parent that forks is not a direct child of the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached_unrelated_debugger);
 ATF_TC_BODY(ptrace__follow_fork_both_attached_unrelated_debugger, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int cpipe[2], status;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		attach_fork_parent(cpipe);
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	close(cpipe[1]);
 
 	/* Wait for the direct child to exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 3);
 
 	/* Read the pid of the fork parent. */
 	ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) ==
 	    sizeof(children[0]));
 
 	/* Attach to the fork parent. */
 	attach_child(children[0]);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the fork parent ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Signal the fork parent to continue. */
 	close(cpipe[0]);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The fork parent can't exit until the child reports status,
 	 * so the child should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the new
  * child process is detached after it reports its fork.  In this test
  * the parent that forks is not a direct child of the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached_unrelated_debugger);
 ATF_TC_BODY(ptrace__follow_fork_child_detached_unrelated_debugger, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int cpipe[2], status;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		attach_fork_parent(cpipe);
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	close(cpipe[1]);
 
 	/* Wait for the direct child to exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 3);
 
 	/* Read the pid of the fork parent. */
 	ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) ==
 	    sizeof(children[0]));
 
 	/* Attach to the fork parent. */
 	attach_child(children[0]);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the fork parent ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Signal the fork parent to continue. */
 	close(cpipe[0]);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * Should not see any status from the child now, only the fork
 	 * parent.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a new child process is stopped after a followed fork
  * and that the traced parent sees the exit of the child when the
  * traced parent is detached after the fork.  In this test the parent
  * that forks is not a direct child of the debugger.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached_unrelated_debugger);
 ATF_TC_BODY(ptrace__follow_fork_parent_detached_unrelated_debugger, tc)
 {
 	pid_t children[2], fpid, wpid;
 	int cpipe[2], status;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		attach_fork_parent(cpipe);
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	close(cpipe[1]);
 
 	/* Wait for the direct child to exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 3);
 
 	/* Read the pid of the fork parent. */
 	ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) ==
 	    sizeof(children[0]));
 
 	/* Attach to the fork parent. */
 	attach_child(children[0]);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the fork parent ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Signal the fork parent to continue. */
 	close(cpipe[0]);
 
 	children[1] = handle_fork_events(children[0], NULL);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * Should not see any status from the fork parent now, only
 	 * the child.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a child process does not see an unrelated debugger as its
  * parent but sees its original parent process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__getppid);
 ATF_TC_BODY(ptrace__getppid, tc)
 {
 	pid_t child, debugger, ppid, wpid;
 	int cpipe[2], dpipe[2], status;
 	char c;
 
 	ATF_REQUIRE(pipe(cpipe) == 0);
 	ATF_REQUIRE((child = fork()) != -1);
 
 	if (child == 0) {
 		/* Child process. */
 		close(cpipe[0]);
 
 		/* Wait for parent to be ready. */
 		CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Report the parent PID to the parent. */
 		ppid = getppid();
 		CHILD_REQUIRE(write(cpipe[1], &ppid, sizeof(ppid)) ==
 		    sizeof(ppid));
 
 		_exit(1);
 	}
 	close(cpipe[1]);
 
 	ATF_REQUIRE(pipe(dpipe) == 0);
 	ATF_REQUIRE((debugger = fork()) != -1);
 
 	if (debugger == 0) {
 		/* Debugger process. */
 		close(dpipe[0]);
 
 		CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1);
 
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1);
 
 		/* Signal parent that debugger is attached. */
 		CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c));
 
 		/* Wait for traced child to exit. */
 		wpid = waitpid(child, &status, 0);
 		CHILD_REQUIRE(wpid == child);
 		CHILD_REQUIRE(WIFEXITED(status));
 		CHILD_REQUIRE(WEXITSTATUS(status) == 1);
 
 		_exit(0);
 	}
 	close(dpipe[1]);
 
 	/* Parent process. */
 
 	/* Wait for the debugger to attach to the child. */
 	ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Release the child. */
 	ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c));
 
 	/* Read the parent PID from the child. */
 	ATF_REQUIRE(read(cpipe[0], &ppid, sizeof(ppid)) == sizeof(ppid));
 	close(cpipe[0]);
 
 	ATF_REQUIRE(ppid == getpid());
 
 	/* Wait for the debugger. */
 	wpid = waitpid(debugger, &status, 0);
 	ATF_REQUIRE(wpid == debugger);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	/* The child process should now be ready. */
 	wpid = waitpid(child, &status, WNOHANG);
 	ATF_REQUIRE(wpid == child);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 }
 
 /*
  * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new
  * child process created via fork() reports the correct value.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_fork);
 ATF_TC_BODY(ptrace__new_child_pl_syscall_code_fork, tc)
 {
 	struct ptrace_lwpinfo pl[2];
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(false);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Wait for both halves of the fork event to get reported. */
 	children[1] = handle_fork_events(children[0], pl);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE(pl[0].pl_syscall_code == SYS_fork);
 	ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code);
 	ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new
  * child process created via vfork() reports the correct value.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_vfork);
 ATF_TC_BODY(ptrace__new_child_pl_syscall_code_vfork, tc)
 {
 	struct ptrace_lwpinfo pl[2];
 	pid_t children[2], fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(true);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Wait for both halves of the fork event to get reported. */
 	children[1] = handle_fork_events(children[0], pl);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0);
 	ATF_REQUIRE(pl[0].pl_syscall_code == SYS_vfork);
 	ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code);
 	ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 simple_thread(void *arg __unused)
 {
 
 	pthread_exit(NULL);
 }
 
 static __dead2 void
 simple_thread_main(void)
 {
 	pthread_t thread;
 
 	CHILD_REQUIRE(pthread_create(&thread, NULL, simple_thread, NULL) == 0);
 	CHILD_REQUIRE(pthread_join(thread, NULL) == 0);
 	exit(1);
 }
 
 /*
  * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new
  * thread reports the correct value.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_thread);
 ATF_TC_BODY(ptrace__new_child_pl_syscall_code_thread, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t mainlwp;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		simple_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	mainlwp = pl.pl_lwpid;
 
 	/*
 	 * Continue the child ignoring the SIGSTOP and tracing all
 	 * system call exits.
 	 */
 	ATF_REQUIRE(ptrace(PT_TO_SCX, fpid, (caddr_t)1, 0) != -1);
 
 	/*
 	 * Wait for the new thread to arrive.  pthread_create() might
 	 * invoke any number of system calls.  For now we just wait
 	 * for the new thread to arrive and make sure it reports a
 	 * valid system call code.  If ptrace grows thread event
 	 * reporting then this test can be made more precise.
 	 */
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 		ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 		    sizeof(pl)) != -1);
 		ATF_REQUIRE((pl.pl_flags & PL_FLAG_SCX) != 0);
 		ATF_REQUIRE(pl.pl_syscall_code != 0);
 		if (pl.pl_lwpid != mainlwp)
 			/* New thread seen. */
 			break;
 
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Wait for the child to exit. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFEXITED(status))
 			break;
 		
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 		
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the expected LWP events are reported for a child thread.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__lwp_events);
 ATF_TC_BODY(ptrace__lwp_events, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t lwps[2];
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		simple_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	lwps[0] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The first event should be for the child thread's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != lwps[0]);
 	lwps[1] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the child thread's death. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) ==
 	    (PL_FLAG_EXITED | PL_FLAG_SCE));
 	ATF_REQUIRE(pl.pl_lwpid == lwps[1]);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 exec_thread(void *arg __unused)
 {
 
 	execl("/usr/bin/true", "true", NULL);
 	exit(127);
 }
 
 static __dead2 void
 exec_thread_main(void)
 {
 	pthread_t thread;
 
 	CHILD_REQUIRE(pthread_create(&thread, NULL, exec_thread, NULL) == 0);
 	for (;;)
 		sleep(60);
 	exit(1);
 }
 
 /*
  * Verify that the expected LWP events are reported for a multithreaded
  * process that calls execve(2).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__lwp_events_exec);
 ATF_TC_BODY(ptrace__lwp_events_exec, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t lwps[2];
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exec_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	lwps[0] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The first event should be for the child thread's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != lwps[0]);
 	lwps[1] = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next event should be for the main thread's death due to
 	 * single threading from execve().
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) ==
 	    (PL_FLAG_EXITED));
 	ATF_REQUIRE(pl.pl_lwpid == lwps[0]);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the child process's exec. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) ==
 	    (PL_FLAG_EXEC | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid == lwps[1]);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 handler(int sig __unused)
 {
 }
 
 static void
 signal_main(void)
 {
 
 	signal(SIGINFO, handler);
 	raise(SIGINFO);
 	exit(0);
 }
 
 /*
  * Verify that the expected ptrace event is reported for a signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__siginfo);
 ATF_TC_BODY(ptrace__siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		signal_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the SIGINFO. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGINFO);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_event == PL_EVENT_SIGNAL);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == SI_LWP);
 	ATF_REQUIRE(pl.pl_siginfo.si_pid == wpid);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the expected ptrace events are reported for PTRACE_EXEC.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_disable);
 ATF_TC_BODY(ptrace__ptrace_exec_disable, tc)
 {
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exec_thread(NULL);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	events = 0;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Should get one event at exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_enable);
 ATF_TC_BODY(ptrace__ptrace_exec_enable, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exec_thread(NULL);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	events = PTRACE_EXEC;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next event should be for the child process's exec. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) ==
 	    (PL_FLAG_EXEC | PL_FLAG_SCX));
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TC_WITHOUT_HEAD(ptrace__event_mask);
 ATF_TC_BODY(ptrace__event_mask, tc)
 {
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(0);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* PT_FOLLOW_FORK should toggle the state of PTRACE_FORK. */
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 1) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(events & PTRACE_FORK);
 	ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(!(events & PTRACE_FORK));
 
 	/* PT_LWP_EVENTS should toggle the state of PTRACE_LWP. */
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 1) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(events & PTRACE_LWP);
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(!(events & PTRACE_LWP));
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Should get one event at exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the expected ptrace events are reported for PTRACE_VFORK.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork);
 ATF_TC_BODY(ptrace__ptrace_vfork, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(true);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events |= PTRACE_VFORK;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1);
 
 	/* The next event should report the end of the vfork. */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_VFORK_DONE) != 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork_follow);
 ATF_TC_BODY(ptrace__ptrace_vfork_follow, tc)
 {
 	struct ptrace_lwpinfo pl[2];
 	pid_t children[2], fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		follow_fork_parent(true);
 	}
 
 	/* Parent process. */
 	children[0] = fpid;
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(children[0], &status, 0);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, children[0], (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events |= PTRACE_FORK | PTRACE_VFORK;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, children[0], (caddr_t)&events,
 	    sizeof(events)) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	/* Wait for both halves of the fork event to get reported. */
 	children[1] = handle_fork_events(children[0], pl);
 	ATF_REQUIRE(children[1] > 0);
 
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORKED) != 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1);
 
 	/*
 	 * The child can't exit until the grandchild reports status, so the
 	 * grandchild should report its exit first to the debugger.
 	 */
 	wpid = waitpid(children[1], &status, 0);
 	ATF_REQUIRE(wpid == children[1]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 2);
 
 	/*
 	 * The child should report it's vfork() completion before it
 	 * exits.
 	 */
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl[0], sizeof(pl[0])) !=
 	    -1);
 	ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORK_DONE) != 0);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == children[0]);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 #ifdef HAVE_BREAKPOINT
 /*
  * Verify that no more events are reported after PT_KILL except for the
  * process exit when stopped due to a breakpoint trap.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_breakpoint);
 ATF_TC_BODY(ptrace__PT_KILL_breakpoint, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		breakpoint();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report hitting the breakpoint. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 #endif /* HAVE_BREAKPOINT */
 
 /*
  * Verify that no more events are reported after PT_KILL except for the
  * process exit when stopped inside of a system call.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_system_call);
 ATF_TC_BODY(ptrace__PT_KILL_system_call, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that no more events are reported after PT_KILL except for the
  * process exit when killing a multithreaded process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_threads);
 ATF_TC_BODY(ptrace__PT_KILL_threads, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t main_lwp;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		simple_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	main_lwp = pl.pl_lwpid;
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The first event should be for the child thread's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 		
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != main_lwp);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 mask_usr1_thread(void *arg)
 {
 	pthread_barrier_t *pbarrier;
 	sigset_t sigmask;
 
 	pbarrier = (pthread_barrier_t*)arg;
 
 	sigemptyset(&sigmask);
 	sigaddset(&sigmask, SIGUSR1);
 	CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 	/* Sync up with other thread after sigmask updated. */
 	pthread_barrier_wait(pbarrier);
 
 	for (;;)
 		sleep(60);
 
 	return (NULL);
 }
 
 /*
  * Verify that the SIGKILL from PT_KILL takes priority over other signals
  * and prevents spurious stops due to those other signals.
  */
 ATF_TC(ptrace__PT_KILL_competing_signal);
 ATF_TC_HEAD(ptrace__PT_KILL_competing_signal, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__PT_KILL_competing_signal, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	cpuset_t setmask;
 	pthread_t t;
 	pthread_barrier_t barrier;
 	struct sched_param sched_param;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		/* Bind to one CPU so only one thread at a time will run. */
 		CPU_ZERO(&setmask);
 		CPU_SET(0, &setmask);
 		cpusetid_t setid;
 		CHILD_REQUIRE(cpuset(&setid) == 0);
 		CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET,
 		    CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0);
 
 		CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0);
 
 		CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread,
 		    (void*)&barrier) == 0);
 
 		/*
 		 * Give the main thread higher priority. The test always
 		 * assumes that, if both threads are able to run, the main
 		 * thread runs first.
 		 */
 		sched_param.sched_priority =
 		    (sched_get_priority_max(SCHED_FIFO) +
 		    sched_get_priority_min(SCHED_FIFO)) / 2;
 		CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
 		    SCHED_FIFO, &sched_param) == 0);
 		sched_param.sched_priority -= RQ_PPQ;
 		CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
 		    &sched_param) == 0);
 
 		sigset_t sigmask;
 		sigemptyset(&sigmask);
 		sigaddset(&sigmask, SIGUSR2);
 		CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		/* Sync up with other thread after sigmask updated. */
 		pthread_barrier_wait(&barrier);
 
 		trace_me();
 
 		for (;;)
 			sleep(60);
 
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Send a signal that only the second thread can handle. */
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* The second wait() should report the SIGUSR2. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Send a signal that only the first thread can handle. */
 	ATF_REQUIRE(kill(fpid, SIGUSR1) == 0);
 
 	/* Replace the SIGUSR2 with a kill. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL (not the SIGUSR signal). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that the SIGKILL from PT_KILL takes priority over other stop events
  * and prevents spurious stops caused by those events.
  */
 ATF_TC(ptrace__PT_KILL_competing_stop);
 ATF_TC_HEAD(ptrace__PT_KILL_competing_stop, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__PT_KILL_competing_stop, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	cpuset_t setmask;
 	pthread_t t;
 	pthread_barrier_t barrier;
 	lwpid_t main_lwp;
 	struct ptrace_lwpinfo pl;
 	struct sched_param sched_param;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 
 		/* Bind to one CPU so only one thread at a time will run. */
 		CPU_ZERO(&setmask);
 		CPU_SET(0, &setmask);
 		cpusetid_t setid;
 		CHILD_REQUIRE(cpuset(&setid) == 0);
 		CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET,
 		    CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0);
 
 		CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0);
 
 		CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread,
 		    (void*)&barrier) == 0);
 
 		/*
 		 * Give the main thread higher priority. The test always
 		 * assumes that, if both threads are able to run, the main
 		 * thread runs first.
 		 */
 		sched_param.sched_priority =
 		    (sched_get_priority_max(SCHED_FIFO) +
 		    sched_get_priority_min(SCHED_FIFO)) / 2;
 		CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
 		    SCHED_FIFO, &sched_param) == 0);
 		sched_param.sched_priority -= RQ_PPQ;
 		CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
 		    &sched_param) == 0);
 
 		sigset_t sigmask;
 		sigemptyset(&sigmask);
 		sigaddset(&sigmask, SIGUSR2);
 		CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		/* Sync up with other thread after sigmask updated. */
 		pthread_barrier_wait(&barrier);
 
 		/* Sync up with the test before doing the getpid(). */
 		raise(SIGSTOP);
 
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	main_lwp = pl.pl_lwpid;
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * Continue until child is done with setup, which is indicated with
 	 * SIGSTOP. Ignore system calls in the meantime.
 	 */
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		if (WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 			    sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 		} else {
 			ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 			break;
 		}
 		ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Proceed, allowing main thread to hit syscall entry for getpid(). */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_lwpid == main_lwp);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	/* Prevent the main thread from hitting its syscall exit for now. */
 	ATF_REQUIRE(ptrace(PT_SUSPEND, main_lwp, 0, 0) == 0);
 
 	/*
 	 * Proceed, allowing second thread to hit syscall exit for
 	 * pthread_barrier_wait().
 	 */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_lwpid != main_lwp);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 
 	/* Send a signal that only the second thread can handle. */
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the SIGUSR2. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Allow the main thread to try to finish its system call. */
 	ATF_REQUIRE(ptrace(PT_RESUME, main_lwp, 0, 0) == 0);
 
 	/*
 	 * At this point, the main thread is in the middle of a system call and
 	 * has been resumed. The second thread has taken a SIGUSR2 which will
 	 * be replaced with a SIGKILL below. The main thread will get to run
 	 * first. It should notice the kill request (even though the signal
 	 * replacement occurred in the other thread) and exit accordingly.  It
 	 * should not stop for the system call exit event.
 	 */
 
 	/* Replace the SIGUSR2 with a kill. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL (not a syscall exit). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 sigusr1_handler(int sig)
 {
 
 	CHILD_REQUIRE(sig == SIGUSR1);
 	_exit(2);
 }
 
 /*
  * Verify that even if the signal queue is full for a child process,
  * a PT_KILL will kill the process.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_with_signal_full_sigqueue);
 ATF_TC_BODY(ptrace__PT_KILL_with_signal_full_sigqueue, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	int max_pending_per_proc;
 	size_t len;
 	int i;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	len = sizeof(max_pending_per_proc);
 	ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc",
 	    &max_pending_per_proc, &len, NULL, 0) == 0);
 
 	/* Fill the signal queue. */
 	for (i = 0; i < max_pending_per_proc; ++i)
 		ATF_REQUIRE(kill(fpid, SIGUSR1) == 0);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that when stopped at a system call entry, a signal can be
  * requested with PT_CONTINUE which will be delivered once the system
  * call is complete.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue the child process with a signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit 2, i.e., a normal _exit
 		 * from the signal handler. In the meantime, catch and proceed
 		 * past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void
 sigusr1_counting_handler(int sig)
 {
 	static int counter = 0;
 
 	CHILD_REQUIRE(sig == SIGUSR1);
 	counter++;
 	if (counter == 2)
 		_exit(2);
 }
 
 /*
  * Verify that, when continuing from a stop at system call entry and exit,
  * a signal can be requested from both stops, and both will be delivered when
  * the system call is complete.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue the child process with a signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The third wait() should report a system call exit for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 
 	/* Continue the child process with a signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit 2, i.e., a normal _exit
 		 * from the signal handler. In the meantime, catch and proceed
 		 * past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that even if the signal queue is full for a child process,
  * a PT_CONTINUE with a signal will not result in loss of that signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_full_sigqueue);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_full_sigqueue, tc)
 {
 	pid_t fpid, wpid;
 	int status;
 	int max_pending_per_proc;
 	size_t len;
 	int i;
 
 	ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	len = sizeof(max_pending_per_proc);
 	ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc",
 	    &max_pending_per_proc, &len, NULL, 0) == 0);
 
 	/* Fill the signal queue. */
 	for (i = 0; i < max_pending_per_proc; ++i)
 		ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* Continue with signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status)) {
 			ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			/*
 			 * The last wait() should report normal _exit from the
 			 * SIGUSR1 handler.
 			 */
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static sem_t sigusr1_sem;
 static int got_usr1;
 
 static void
 sigusr1_sempost_handler(int sig __unused)
 {
 
 	got_usr1++;
 	CHILD_REQUIRE(sem_post(&sigusr1_sem) == 0);
 }
 
 /*
  * Verify that even if the signal queue is full for a child process,
  * and the signal is masked, a PT_CONTINUE with a signal will not
  * result in loss of that signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, err;
 	int max_pending_per_proc;
 	size_t len;
 	int i;
 	sigset_t sigmask;
 
 	ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR);
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 
 	got_usr1 = 0;
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 		CHILD_REQUIRE(got_usr1 == 0);
 
 		/* Allow the pending SIGUSR1 in now. */
 		CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 		/* Wait to receive the SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 		CHILD_REQUIRE(got_usr1 == 1);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	len = sizeof(max_pending_per_proc);
 	ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc",
 	    &max_pending_per_proc, &len, NULL, 0) == 0);
 
 	/* Fill the signal queue. */
 	for (i = 0; i < max_pending_per_proc; ++i)
 		ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* Continue with signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* Collect and ignore all of the SIGUSR2. */
 	for (i = 0; i < max_pending_per_proc; ++i) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Now our PT_CONTINUE'd SIGUSR1 should cause a stop after unmask. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1);
 
 	/* Continue the child, ignoring the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last wait() should report exit after receiving SIGUSR1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that, after stopping due to a signal, that signal can be
  * replaced with another signal.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_change_sig);
 ATF_TC_BODY(ptrace__PT_CONTINUE_change_sig, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		sleep(20);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Send a signal without ptrace. */
 	ATF_REQUIRE(kill(fpid, SIGINT) == 0);
 
 	/* The second wait() should report a SIGINT was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGINT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGINT);
 
 	/* Continue the child process with a different signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTERM) == 0);
 
 	/*
 	 * The last wait() should report having died due to the new
 	 * signal, SIGTERM.
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGTERM);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a signal can be passed through to the child even when there
  * was no true signal originally. Such cases arise when a SIGTRAP is
  * invented for e.g, system call stops.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry, tc)
 {
 	struct ptrace_lwpinfo pl;
 	struct rlimit rl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		/* SIGTRAP expected to cause exit on syscall entry. */
 		rl.rlim_cur = rl.rlim_max = 0;
 		ATF_REQUIRE(setrlimit(RLIMIT_CORE, &rl) == 0);
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue the child process with a SIGTRAP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTRAP) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit due to SIGTRAP.  In the
 		 * meantime, catch and proceed past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFSIGNALED(status));
 			ATF_REQUIRE(WTERMSIG(status) == SIGTRAP);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 
 }
 
 /*
  * A mixed bag PT_CONTINUE with signal test.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_mix);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_mix, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		getpid();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP and tracing system calls. */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a system call entry for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 
 	/* Continue with the first SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next wait() should report a system call exit for getpid(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX);
 
 	/* Send an ABRT without ptrace. */
 	ATF_REQUIRE(kill(fpid, SIGABRT) == 0);
 
 	/* Continue normally. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the SIGABRT. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/* Continue, replacing the SIGABRT with another SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	for (;;) {
 		/*
 		 * The last wait() should report exit 2, i.e., a normal _exit
 		 * from the signal handler. In the meantime, catch and proceed
 		 * past any syscall stops.
 		 */
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
 			ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 			ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX));
 			ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 		} else {
 			ATF_REQUIRE(WIFEXITED(status));
 			ATF_REQUIRE(WEXITSTATUS(status) == 2);
 			break;
 		}
 	}
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 
 }
 
 /*
  * Verify a signal delivered by ptrace is noticed by kevent(2).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_kqueue);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_kqueue, tc)
 {
 	pid_t fpid, wpid;
 	int status, kq, nevents;
 	struct kevent kev;
 
 	ATF_REQUIRE(signal(SIGUSR1, SIG_IGN) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE((kq = kqueue()) > 0);
 		EV_SET(&kev, SIGUSR1, EVFILT_SIGNAL, EV_ADD, 0, 0, 0);
 		CHILD_REQUIRE(kevent(kq, &kev, 1, NULL, 0, NULL) == 0);
 
 		trace_me();
 
 		for (;;) {
 			nevents = kevent(kq, NULL, 0, &kev, 1, NULL);
 			if (nevents == -1 && errno == EINTR)
 				continue;
 			CHILD_REQUIRE(nevents > 0);
 			CHILD_REQUIRE(kev.filter == EVFILT_SIGNAL);
 			CHILD_REQUIRE(kev.ident == SIGUSR1);
 			break;
 		}
 
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue with the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/*
 	 * The last wait() should report normal exit with code 1.
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 signal_thread(void *arg)
 {
 	int err;
 	sigset_t sigmask;
 
 	pthread_barrier_t *pbarrier = (pthread_barrier_t*)arg;
 
 	/* Wait for this thread to receive a SIGUSR1. */
 	do {
 		err = sem_wait(&sigusr1_sem);
 		CHILD_REQUIRE(err == 0 || errno == EINTR);
 	} while (err != 0 && errno == EINTR);
 
 	/* Free our companion thread from the barrier. */
 	pthread_barrier_wait(pbarrier);
 
 	/*
 	 * Swap ignore duties; the next SIGUSR1 should go to the
 	 * other thread.
 	 */
 	CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 	CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 	CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 	/* Sync up threads after swapping signal masks. */
 	pthread_barrier_wait(pbarrier);
 
 	/* Wait until our companion has received its SIGUSR1. */
 	pthread_barrier_wait(pbarrier);
 
 	return (NULL);
 }
 
 /*
  * Verify that a traced process with blocked signal received the
  * signal from kill() once unmasked.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__killed_with_sigmask);
 ATF_TC_BODY(ptrace__killed_with_sigmask, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, err;
 	sigset_t sigmask;
 
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 	got_usr1 = 0;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 		CHILD_REQUIRE(got_usr1 == 0);
 
 		/* Allow the pending SIGUSR1 in now. */
 		CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 		/* Wait to receive a SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 		CHILD_REQUIRE(got_usr1 == 1);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP);
 
 	/* Send blocked SIGUSR1 which should cause a stop. */
 	ATF_REQUIRE(kill(fpid, SIGUSR1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the kill(SIGUSR1) was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1);
 
 	/* Continue the child, allowing in the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The last wait() should report normal exit with code 1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that a traced process with blocked signal received the
  * signal from PT_CONTINUE once unmasked.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigmask);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigmask, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, err;
 	sigset_t sigmask;
 
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 	got_usr1 = 0;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 		CHILD_REQUIRE(got_usr1 == 0);
 
 		/* Allow the pending SIGUSR1 in now. */
 		CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 		/* Wait to receive a SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 
 		CHILD_REQUIRE(got_usr1 == 1);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP);
 
 	/* Continue the child replacing SIGSTOP with SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next wait() should report the SIGUSR1 was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1);
 	ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1);
 
 	/* Continue the child, ignoring the SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last wait() should report normal exit with code 1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 /*
  * Verify that if ptrace stops due to a signal but continues with
  * a different signal that the new signal is routed to a thread
  * that can accept it, and that the thread is awakened by the signal
  * in a timely manner.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_thread_sigmask);
 ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_thread_sigmask, tc)
 {
 	pid_t fpid, wpid;
 	int status, err;
 	pthread_t t;
 	sigset_t sigmask;
 	pthread_barrier_t barrier;
 
 	ATF_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0);
 	ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0);
 	ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR);
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		CHILD_REQUIRE(pthread_create(&t, NULL, signal_thread, (void*)&barrier) == 0);
 
 		/* The other thread should receive the first SIGUSR1. */
 		CHILD_REQUIRE(sigemptyset(&sigmask) == 0);
 		CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0);
 		CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0);
 
 		trace_me();
 
 		/* Wait until other thread has received its SIGUSR1. */
 		pthread_barrier_wait(&barrier);
 
 		/*
 		 * Swap ignore duties; the next SIGUSR1 should go to this
 		 * thread.
 		 */
 		CHILD_REQUIRE(pthread_sigmask(SIG_UNBLOCK, &sigmask, NULL) == 0);
 
 		/* Sync up threads after swapping signal masks. */
 		pthread_barrier_wait(&barrier);
 
 		/*
 		 * Sync up with test code; we're ready for the next SIGUSR1
 		 * now.
 		 */
 		raise(SIGSTOP);
 
 		/* Wait for this thread to receive a SIGUSR1. */
 		do {
 			err = sem_wait(&sigusr1_sem);
 			CHILD_REQUIRE(err == 0 || errno == EINTR);
 		} while (err != 0 && errno == EINTR);
 
 		/* Free the other thread from the barrier. */
 		pthread_barrier_wait(&barrier);
 
 		CHILD_REQUIRE(pthread_join(t, NULL) == 0);
 
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * Send a signal without ptrace that either thread will accept (USR2,
 	 * in this case).
 	 */
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 	
 	/* The second wait() should report a SIGUSR2 was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Continue the child, changing the signal to USR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	ATF_REQUIRE(kill(fpid, SIGUSR2) == 0);
 
 	/* The next wait() should report a SIGUSR2 was received. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2);
 
 	/* Continue the child, changing the signal to USR1. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The last wait() should report normal exit with code 1. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 raise_sigstop_thread(void *arg __unused)
 {
 
 	raise(SIGSTOP);
 	return NULL;
 }
 
 static void *
 sleep_thread(void *arg __unused)
 {
 
 	sleep(60);
 	return NULL;
 }
 
 static void
 terminate_with_pending_sigstop(bool sigstop_from_main_thread)
 {
 	pid_t fpid, wpid;
 	int status, i;
 	cpuset_t setmask;
 	cpusetid_t setid;
 	pthread_t t;
 
 	/*
 	 * Become the reaper for this process tree. We need to be able to check
 	 * that both child and grandchild have died.
 	 */
 	ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_ACQUIRE, NULL) == 0);
 
 	fpid = fork();
 	ATF_REQUIRE(fpid >= 0);
 	if (fpid == 0) {
 		fpid = fork();
 		CHILD_REQUIRE(fpid >= 0);
 		if (fpid == 0) {
 			trace_me();
 
 			/* Pin to CPU 0 to serialize thread execution. */
 			CPU_ZERO(&setmask);
 			CPU_SET(0, &setmask);
 			CHILD_REQUIRE(cpuset(&setid) == 0);
 			CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET,
 			    CPU_WHICH_CPUSET, setid,
 			    sizeof(setmask), &setmask) == 0);
 
 			if (sigstop_from_main_thread) {
 				/*
 				 * We expect the SIGKILL sent when our parent
 				 * dies to be delivered to the new thread.
 				 * Raise the SIGSTOP in this thread so the
 				 * threads compete.
 				 */
 				CHILD_REQUIRE(pthread_create(&t, NULL,
 				    sleep_thread, NULL) == 0);
 				raise(SIGSTOP);
 			} else {
 				/*
 				 * We expect the SIGKILL to be delivered to
 				 * this thread. After creating the new thread,
 				 * just get off the CPU so the other thread can
 				 * raise the SIGSTOP.
 				 */
 				CHILD_REQUIRE(pthread_create(&t, NULL,
 				    raise_sigstop_thread, NULL) == 0);
 				sleep(60);
 			}
 
 			exit(0);
 		}
 		/* First stop is trace_me() immediately after fork. */
 		wpid = waitpid(fpid, &status, 0);
 		CHILD_REQUIRE(wpid == fpid);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		CHILD_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 		/* Second stop is from the raise(SIGSTOP). */
 		wpid = waitpid(fpid, &status, 0);
 		CHILD_REQUIRE(wpid == fpid);
 		CHILD_REQUIRE(WIFSTOPPED(status));
 		CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 		/*
 		 * Terminate tracing process without detaching. Our child
 		 * should be killed.
 		 */
 		exit(0);
 	}
 
 	/*
 	 * We should get a normal exit from our immediate child and a SIGKILL
 	 * exit from our grandchild. The latter case is the interesting one.
 	 * Our grandchild should not have stopped due to the SIGSTOP that was
 	 * left dangling when its parent died.
 	 */
 	for (i = 0; i < 2; ++i) {
 		wpid = wait(&status);
 		if (wpid == fpid)
 			ATF_REQUIRE(WIFEXITED(status) &&
 			    WEXITSTATUS(status) == 0);
 		else
 			ATF_REQUIRE(WIFSIGNALED(status) &&
 			    WTERMSIG(status) == SIGKILL);
 	}
 }
 
 /*
  * These two tests ensure that if the tracing process exits without detaching
  * just after the child received a SIGSTOP, the child is cleanly killed and
  * doesn't go to sleep due to the SIGSTOP. The parent's death will send a
  * SIGKILL to the child. If the SIGKILL and the SIGSTOP are handled by
  * different threads, the SIGKILL must win.  There are two variants of this
  * test, designed to catch the case where the SIGKILL is delivered to the
  * younger thread (the first test) and the case where the SIGKILL is delivered
  * to the older thread (the second test). This behavior has changed in the
  * past, so make no assumption.
  */
 ATF_TC(ptrace__parent_terminate_with_pending_sigstop1);
 ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop1, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop1, tc)
 {
 
 	terminate_with_pending_sigstop(true);
 }
 
 ATF_TC(ptrace__parent_terminate_with_pending_sigstop2);
 ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop2, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop2, tc)
 {
 
 	terminate_with_pending_sigstop(false);
 }
 
 /*
  * Verify that after ptrace() discards a SIGKILL signal, the event mask
  * is not modified.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__event_mask_sigkill_discard);
 ATF_TC_BODY(ptrace__event_mask_sigkill_discard, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status, event_mask, new_event_mask;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		raise(SIGSTOP);
 		exit(0);
 	}
 
 	/* The first wait() should report the stop from trace_me(). */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Set several unobtrusive event bits. */
 	event_mask = PTRACE_EXEC | PTRACE_FORK | PTRACE_LWP;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, wpid, (caddr_t)&event_mask,
 	    sizeof(event_mask)) == 0);
 
 	/* Send a SIGKILL without using ptrace. */
 	ATF_REQUIRE(kill(fpid, SIGKILL) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should be due to the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGKILL);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGKILL);
 
 	/* Continue the child ignoring the SIGKILL. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Check the current event mask. It should not have changed. */
 	new_event_mask = 0;
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, wpid, (caddr_t)&new_event_mask,
 	    sizeof(new_event_mask)) == 0);
 	ATF_REQUIRE(event_mask == new_event_mask);
 
 	/* Continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 0);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 static void *
 flock_thread(void *arg)
 {
 	int fd;
 
 	fd = *(int *)arg;
 	(void)flock(fd, LOCK_EX);
 	(void)flock(fd, LOCK_UN);
 	return (NULL);
 }
 
 /*
  * Verify that PT_ATTACH will suspend threads sleeping in an SBDRY section.
  * We rely on the fact that the lockf implementation sets SBDRY before blocking
  * on a lock. This is a regression test for r318191.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_ATTACH_with_SBDRY_thread);
 ATF_TC_BODY(ptrace__PT_ATTACH_with_SBDRY_thread, tc)
 {
 	pthread_barrier_t barrier;
 	pthread_barrierattr_t battr;
 	char tmpfile[64];
 	pid_t child, wpid;
 	int error, fd, i, status;
 
 	ATF_REQUIRE(pthread_barrierattr_init(&battr) == 0);
 	ATF_REQUIRE(pthread_barrierattr_setpshared(&battr,
 	    PTHREAD_PROCESS_SHARED) == 0);
 	ATF_REQUIRE(pthread_barrier_init(&barrier, &battr, 2) == 0);
 
 	(void)snprintf(tmpfile, sizeof(tmpfile), "./ptrace.XXXXXX");
 	fd = mkstemp(tmpfile);
 	ATF_REQUIRE(fd >= 0);
 
 	ATF_REQUIRE((child = fork()) != -1);
 	if (child == 0) {
 		pthread_t t[2];
 		int cfd;
 
 		error = pthread_barrier_wait(&barrier);
 		if (error != 0 && error != PTHREAD_BARRIER_SERIAL_THREAD)
 			_exit(1);
 
 		cfd = open(tmpfile, O_RDONLY);
 		if (cfd < 0)
 			_exit(1);
 
 		/*
 		 * We want at least two threads blocked on the file lock since
 		 * the SIGSTOP from PT_ATTACH may kick one of them out of
 		 * sleep.
 		 */
 		if (pthread_create(&t[0], NULL, flock_thread, &cfd) != 0)
 			_exit(1);
 		if (pthread_create(&t[1], NULL, flock_thread, &cfd) != 0)
 			_exit(1);
 		if (pthread_join(t[0], NULL) != 0)
 			_exit(1);
 		if (pthread_join(t[1], NULL) != 0)
 			_exit(1);
 		_exit(0);
 	}
 
 	ATF_REQUIRE(flock(fd, LOCK_EX) == 0);
 
 	error = pthread_barrier_wait(&barrier);
 	ATF_REQUIRE(error == 0 || error == PTHREAD_BARRIER_SERIAL_THREAD);
 
 	/*
 	 * Give the child some time to block. Is there a better way to do this?
 	 */
 	sleep(1);
 
 	/*
 	 * Attach and give the child 3 seconds to stop.
 	 */
 	ATF_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) == 0);
 	for (i = 0; i < 3; i++) {
 		wpid = waitpid(child, &status, WNOHANG);
 		if (wpid == child && WIFSTOPPED(status) &&
 		    WSTOPSIG(status) == SIGSTOP)
 			break;
 		sleep(1);
 	}
 	ATF_REQUIRE_MSG(i < 3, "failed to stop child process after PT_ATTACH");
 
 	ATF_REQUIRE(ptrace(PT_DETACH, child, NULL, 0) == 0);
 
 	ATF_REQUIRE(flock(fd, LOCK_UN) == 0);
 	ATF_REQUIRE(unlink(tmpfile) == 0);
 	ATF_REQUIRE(close(fd) == 0);
 }
 
 static void
 sigusr1_step_handler(int sig)
 {
 
 	CHILD_REQUIRE(sig == SIGUSR1);
 	raise(SIGABRT);
 }
 
 /*
  * Verify that PT_STEP with a signal invokes the signal before
  * stepping the next instruction (and that the next instruction is
  * stepped correctly).
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_STEP_with_signal);
 ATF_TC_BODY(ptrace__PT_STEP_with_signal, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		signal(SIGUSR1, sigusr1_step_handler);
 		raise(SIGABRT);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should report the SIGABRT in the child body. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/* Step the child process inserting SIGUSR1. */
 	ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, SIGUSR1) == 0);
 
 	/* The next stop should report the SIGABRT in the signal handler. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/* Continue the child process discarding the signal. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should report a trace trap from PT_STEP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE);
 
 	/* Continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 #ifdef HAVE_BREAKPOINT
 /*
  * Verify that a SIGTRAP event with the TRAP_BRKPT code is reported
  * for a breakpoint trap.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__breakpoint_siginfo);
 ATF_TC_BODY(ptrace__breakpoint_siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		breakpoint();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report hitting the breakpoint. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_BRKPT);
 
 	/* Kill the child process. */
 	ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0);
 
 	/* The last wait() should report the SIGKILL. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSIGNALED(status));
 	ATF_REQUIRE(WTERMSIG(status) == SIGKILL);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 #endif /* HAVE_BREAKPOINT */
 
 /*
  * Verify that a SIGTRAP event with the TRAP_TRACE code is reported
  * for a single-step trap from PT_STEP.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__step_siginfo);
 ATF_TC_BODY(ptrace__step_siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	/* Step the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, 0) == 0);
 
 	/* The second wait() should report a single-step trap. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP);
 	ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE);
 
 	/* Continue the child process. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK)
 static void *
 continue_thread(void *arg __unused)
 {
 	breakpoint();
 	return (NULL);
 }
 
 static __dead2 void
 continue_thread_main(void)
 {
 	pthread_t threads[2];
 
 	CHILD_REQUIRE(pthread_create(&threads[0], NULL, continue_thread,
 	    NULL) == 0);
 	CHILD_REQUIRE(pthread_create(&threads[1], NULL, continue_thread,
 	    NULL) == 0);
 	CHILD_REQUIRE(pthread_join(threads[0], NULL) == 0);
 	CHILD_REQUIRE(pthread_join(threads[1], NULL) == 0);
 	exit(1);
 }
 
 /*
  * Ensure that PT_CONTINUE clears the status of the thread that
  * triggered the stop even if a different thread's LWP was passed to
  * PT_CONTINUE.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_different_thread);
 ATF_TC_BODY(ptrace__PT_CONTINUE_different_thread, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	lwpid_t lwps[2];
 	bool hit_break[2];
 	struct reg reg;
 	int i, j, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		continue_thread_main();
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 	    sizeof(pl)) != -1);
 
 	ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0);
 
 	/* Continue the child ignoring the SIGSTOP. */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* One of the new threads should report it's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	lwps[0] = pl.pl_lwpid;
 
 	/*
 	 * Suspend this thread to ensure both threads are alive before
 	 * hitting the breakpoint.
 	 */
 	ATF_REQUIRE(ptrace(PT_SUSPEND, lwps[0], NULL, 0) != -1);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* Second thread should report it's birth. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) ==
 	    (PL_FLAG_BORN | PL_FLAG_SCX));
 	ATF_REQUIRE(pl.pl_lwpid != lwps[0]);
 	lwps[1] = pl.pl_lwpid;
 
 	/* Resume both threads waiting for breakpoint events. */
 	hit_break[0] = hit_break[1] = false;
 	ATF_REQUIRE(ptrace(PT_RESUME, lwps[0], NULL, 0) != -1);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* One thread should report a breakpoint. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP &&
 	    pl.pl_siginfo.si_code == TRAP_BRKPT);
 	if (pl.pl_lwpid == lwps[0])
 		i = 0;
 	else
 		i = 1;
 	hit_break[i] = true;
 	ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)&reg, 0) != -1);
 	SKIP_BREAK(&reg);
 	ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)&reg, 0) != -1);
 
 	/*
 	 * Resume both threads but pass the other thread's LWPID to
 	 * PT_CONTINUE.
 	 */
 	ATF_REQUIRE(ptrace(PT_CONTINUE, lwps[i ^ 1], (caddr_t)1, 0) == 0);
 
 	/*
 	 * Will now get two thread exit events and one more breakpoint
 	 * event.
 	 */
 	for (j = 0; j < 3; j++) {
 		wpid = waitpid(fpid, &status, 0);
 		ATF_REQUIRE(wpid == fpid);
 		ATF_REQUIRE(WIFSTOPPED(status));
 		ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 		ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl,
 		    sizeof(pl)) != -1);
 		
 		if (pl.pl_lwpid == lwps[0])
 			i = 0;
 		else
 			i = 1;
 
 		ATF_REQUIRE_MSG(lwps[i] != 0, "event for exited thread");
 		if (pl.pl_flags & PL_FLAG_EXITED) {
 			ATF_REQUIRE_MSG(hit_break[i],
 			    "exited thread did not report breakpoint");
 			lwps[i] = 0;
 		} else {
 			ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0);
 			ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP &&
 			    pl.pl_siginfo.si_code == TRAP_BRKPT);
 			ATF_REQUIRE_MSG(!hit_break[i],
 			    "double breakpoint event");
 			hit_break[i] = true;
 			ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)&reg,
 			    0) != -1);
 			SKIP_BREAK(&reg);
 			ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)&reg,
 			    0) != -1);
 		}
 
 		ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 	}
 
 	/* Both threads should have exited. */
 	ATF_REQUIRE(lwps[0] == 0);
 	ATF_REQUIRE(lwps[1] == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 #endif
 
 /*
  * Verify that PT_LWPINFO doesn't return stale siginfo.
  */
 ATF_TC_WITHOUT_HEAD(ptrace__PT_LWPINFO_stale_siginfo);
 ATF_TC_BODY(ptrace__PT_LWPINFO_stale_siginfo, tc)
 {
 	struct ptrace_lwpinfo pl;
 	pid_t fpid, wpid;
 	int events, status;
 
 	ATF_REQUIRE((fpid = fork()) != -1);
 	if (fpid == 0) {
 		trace_me();
 		raise(SIGABRT);
 		exit(1);
 	}
 
 	/* The first wait() should report the stop from SIGSTOP. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP);
 
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The next stop should report the SIGABRT in the child body. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGABRT);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI);
 	ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT);
 
 	/*
 	 * Continue the process ignoring the signal, but enabling
 	 * syscall traps.
 	 */
 	ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0);
 
 	/*
 	 * The next stop should report a system call entry from
 	 * exit().  PL_FLAGS_SI should not be set.
 	 */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(wpid == fpid);
 	ATF_REQUIRE(WIFSTOPPED(status));
 	ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP);
 
 	ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1);
 	ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE);
 	ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) == 0);
 
 	/* Disable syscall tracing and continue the child to let it exit. */
 	ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	events &= ~PTRACE_SYSCALL;
 	ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events,
 	    sizeof(events)) == 0);
 	ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0);
 
 	/* The last event should be for the child process's exit. */
 	wpid = waitpid(fpid, &status, 0);
 	ATF_REQUIRE(WIFEXITED(status));
 	ATF_REQUIRE(WEXITSTATUS(status) == 1);
 
 	wpid = wait(&status);
 	ATF_REQUIRE(wpid == -1);
 	ATF_REQUIRE(errno == ECHILD);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_trace_me);
 	ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_attach);
 	ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_child_debugger);
 	ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_unrelated_debugger);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_child_detached);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_parent_detached);
 	ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached_unrelated_debugger);
 	ATF_TP_ADD_TC(tp,
 	    ptrace__follow_fork_child_detached_unrelated_debugger);
 	ATF_TP_ADD_TC(tp,
 	    ptrace__follow_fork_parent_detached_unrelated_debugger);
 	ATF_TP_ADD_TC(tp, ptrace__getppid);
 	ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_fork);
 	ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_vfork);
 	ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_thread);
 	ATF_TP_ADD_TC(tp, ptrace__lwp_events);
 	ATF_TP_ADD_TC(tp, ptrace__lwp_events_exec);
 	ATF_TP_ADD_TC(tp, ptrace__siginfo);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_disable);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_enable);
 	ATF_TP_ADD_TC(tp, ptrace__event_mask);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork);
 	ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork_follow);
 #ifdef HAVE_BREAKPOINT
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_breakpoint);
 #endif
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_system_call);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_threads);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_signal);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_stop);
 	ATF_TP_ADD_TC(tp, ptrace__PT_KILL_with_signal_full_sigqueue);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_system_call_entry);
 	ATF_TP_ADD_TC(tp,
 	    ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_full_sigqueue);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_change_sig);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigtrap_system_call_entry);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_mix);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_kqueue);
 	ATF_TP_ADD_TC(tp, ptrace__killed_with_sigmask);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigmask);
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_thread_sigmask);
 	ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop1);
 	ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop2);
 	ATF_TP_ADD_TC(tp, ptrace__event_mask_sigkill_discard);
 	ATF_TP_ADD_TC(tp, ptrace__PT_ATTACH_with_SBDRY_thread);
 	ATF_TP_ADD_TC(tp, ptrace__PT_STEP_with_signal);
 #ifdef HAVE_BREAKPOINT
 	ATF_TP_ADD_TC(tp, ptrace__breakpoint_siginfo);
 #endif
 	ATF_TP_ADD_TC(tp, ptrace__step_siginfo);
 #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK)
 	ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_different_thread);
 #endif
 	ATF_TP_ADD_TC(tp, ptrace__PT_LWPINFO_stale_siginfo);
 
 	return (atf_no_error());
 }
Index: head/tools/tools/decioctl/decioctl.c
===================================================================
--- head/tools/tools/decioctl/decioctl.c	(revision 344854)
+++ head/tools/tools/decioctl/decioctl.c	(revision 344855)
@@ -1,94 +1,93 @@
 /*-
  * Copyright (c) 2005-2006,2016 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/ioccom.h>
 #include <ctype.h>
 #include <errno.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sysdecode.h>
 
 static void
 usage(char **av)
 {
 	fprintf(stderr, "%s: <ioctl> [ ... ]\n", av[0]);
 	exit(1);
 }
 
 int
 main(int ac, char **av)
 {
 	unsigned long cmd;
 	const char *name;
 	char *cp;
 	int group, i;
 
 	if (ac < 2)
 		usage(av);
 	printf("  command :  dir  group num  len name\n");
 	for (i = 1; i < ac; i++) {
 		errno = 0;
 		cmd = strtoul(av[i], &cp, 0);
 		if (*cp != '\0' || errno != 0) {
 			fprintf(stderr, "Invalid integer: %s\n", av[i]);
 			usage(av);
 		}
 		printf("0x%08lx: ", cmd);
 		switch (cmd & IOC_DIRMASK) {
 		case IOC_VOID:
 			printf("VOID ");
 			break;
 		case IOC_OUT:
 			printf("OUT  ");
 			break;
 		case IOC_IN:
 			printf("IN   ");
 			break;
 		case IOC_INOUT:
 			printf("INOUT");
 			break;
 		default:
 			printf("%01lx ???", (cmd & IOC_DIRMASK) >> 29);
 			break;
 		}
 		printf(" ");
 		group = IOCGROUP(cmd);
 		if (isprint(group))
 			printf(" '%c' ", group);
 		else
 			printf(" 0x%02x", group);
 		printf(" %3lu %4lu", cmd & 0xff, IOCPARM_LEN(cmd));
 		name = sysdecode_ioctlname(cmd);
 		if (name != NULL)
 			printf(" %s", name);
 		printf("\n");
 	}
 	return (0);
 }
Index: head/usr.sbin/bhyve/gdb.c
===================================================================
--- head/usr.sbin/bhyve/gdb.c	(revision 344854)
+++ head/usr.sbin/bhyve/gdb.c	(revision 344855)
@@ -1,1313 +1,1312 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <machine/atomic.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 #include <netinet/in.h>
 #include <assert.h>
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <unistd.h>
 #include <vmmapi.h>
 
 #include "bhyverun.h"
 #include "mem.h"
 #include "mevent.h"
 
 /*
  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
  * use SIGTRAP.
  */
 #define	GDB_SIGNAL_TRAP		5
 
 static void gdb_resume_vcpus(void);
 static void check_command(int fd);
 
 static struct mevent *read_event, *write_event;
 
 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
 static pthread_mutex_t gdb_lock;
 static pthread_cond_t idle_vcpus;
 static bool stop_pending, first_stop;
 static int stepping_vcpu, stopped_vcpu;
 
 /*
  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
  * read buffer, 'start' is unused and 'len' contains the number of
  * valid bytes in the buffer.  For a write buffer, 'start' is set to
  * the index of the next byte in 'data' to send, and 'len' contains
  * the remaining number of valid bytes to send.
  */
 struct io_buffer {
 	uint8_t *data;
 	size_t capacity;
 	size_t start;
 	size_t len;
 };
 
 static struct io_buffer cur_comm, cur_resp;
 static uint8_t cur_csum;
 static int cur_vcpu;
 static struct vmctx *ctx;
 static int cur_fd = -1;
 
 const int gdb_regset[] = {
 	VM_REG_GUEST_RAX,
 	VM_REG_GUEST_RBX,
 	VM_REG_GUEST_RCX,
 	VM_REG_GUEST_RDX,
 	VM_REG_GUEST_RSI,
 	VM_REG_GUEST_RDI,
 	VM_REG_GUEST_RBP,
 	VM_REG_GUEST_RSP,
 	VM_REG_GUEST_R8,
 	VM_REG_GUEST_R9,
 	VM_REG_GUEST_R10,
 	VM_REG_GUEST_R11,
 	VM_REG_GUEST_R12,
 	VM_REG_GUEST_R13,
 	VM_REG_GUEST_R14,
 	VM_REG_GUEST_R15,
 	VM_REG_GUEST_RIP,
 	VM_REG_GUEST_RFLAGS,
 	VM_REG_GUEST_CS,
 	VM_REG_GUEST_SS,
 	VM_REG_GUEST_DS,
 	VM_REG_GUEST_ES,
 	VM_REG_GUEST_FS,
 	VM_REG_GUEST_GS
 };
 
 const int gdb_regsize[] = {
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	8,
 	4,
 	4,
 	4,
 	4,
 	4,
 	4,
 	4
 };
 
 #ifdef GDB_LOG
 #include <stdarg.h>
 #include <stdio.h>
 
 static void __printflike(1, 2)
 debug(const char *fmt, ...)
 {
 	static FILE *logfile;
 	va_list ap;
 
 	if (logfile == NULL) {
 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
 		if (logfile == NULL)
 			return;
 #ifndef WITHOUT_CAPSICUM
 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
 			fclose(logfile);
 			logfile = NULL;
 			return;
 		}
 #endif
 		setlinebuf(logfile);
 	}
 	va_start(ap, fmt);
 	vfprintf(logfile, fmt, ap);
 	va_end(ap);
 }
 #else
 #define debug(...)
 #endif
 
 static int
 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
 {
 	uint64_t regs[4];
 	const int regset[4] = {
 		VM_REG_GUEST_CR0,
 		VM_REG_GUEST_CR3,
 		VM_REG_GUEST_CR4,
 		VM_REG_GUEST_EFER
 	};
 
 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
 		return (-1);
 
 	/*
 	 * For the debugger, always pretend to be the kernel (CPL 0),
 	 * and if long-mode is enabled, always parse addresses as if
 	 * in 64-bit mode.
 	 */
 	paging->cr3 = regs[1];
 	paging->cpl = 0;
 	if (regs[3] & EFER_LMA)
 		paging->cpu_mode = CPU_MODE_64BIT;
 	else if (regs[0] & CR0_PE)
 		paging->cpu_mode = CPU_MODE_PROTECTED;
 	else
 		paging->cpu_mode = CPU_MODE_REAL;
 	if (!(regs[0] & CR0_PG))
 		paging->paging_mode = PAGING_MODE_FLAT;
 	else if (!(regs[2] & CR4_PAE))
 		paging->paging_mode = PAGING_MODE_32;
 	else if (regs[3] & EFER_LME)
 		paging->paging_mode = PAGING_MODE_64;
 	else
 		paging->paging_mode = PAGING_MODE_PAE;
 	return (0);
 }
 
 /*
  * Map a guest virtual address to a physical address (for a given vcpu).
  * If a guest virtual address is valid, return 1.  If the address is
  * not valid, return 0.  If an error occurs obtaining the mapping,
  * return -1.
  */
 static int
 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
 {
 	struct vm_guest_paging paging;
 	int fault;
 
 	if (guest_paging_info(vcpu, &paging) == -1)
 		return (-1);
 
 	/*
 	 * Always use PROT_READ.  We really care if the VA is
 	 * accessible, not if the current vCPU can write.
 	 */
 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
 	    &fault) == -1)
 		return (-1);
 	if (fault)
 		return (0);
 	return (1);
 }
 
 static void
 io_buffer_reset(struct io_buffer *io)
 {
 
 	io->start = 0;
 	io->len = 0;
 }
 
 /* Available room for adding data. */
 static size_t
 io_buffer_avail(struct io_buffer *io)
 {
 
 	return (io->capacity - (io->start + io->len));
 }
 
 static uint8_t *
 io_buffer_head(struct io_buffer *io)
 {
 
 	return (io->data + io->start);
 }
 
 static uint8_t *
 io_buffer_tail(struct io_buffer *io)
 {
 
 	return (io->data + io->start + io->len);
 }
 
 static void
 io_buffer_advance(struct io_buffer *io, size_t amount)
 {
 
 	assert(amount <= io->len);
 	io->start += amount;
 	io->len -= amount;
 }
 
 static void
 io_buffer_consume(struct io_buffer *io, size_t amount)
 {
 
 	io_buffer_advance(io, amount);
 	if (io->len == 0) {
 		io->start = 0;
 		return;
 	}
 
 	/*
 	 * XXX: Consider making this move optional and compacting on a
 	 * future read() before realloc().
 	 */
 	memmove(io->data, io_buffer_head(io), io->len);
 	io->start = 0;
 }
 
 static void
 io_buffer_grow(struct io_buffer *io, size_t newsize)
 {
 	uint8_t *new_data;
 	size_t avail, new_cap;
 
 	avail = io_buffer_avail(io);
 	if (newsize <= avail)
 		return;
 
 	new_cap = io->capacity + (newsize - avail);
 	new_data = realloc(io->data, new_cap);
 	if (new_data == NULL)
 		err(1, "Failed to grow GDB I/O buffer");
 	io->data = new_data;
 	io->capacity = new_cap;
 }
 
 static bool
 response_pending(void)
 {
 
 	if (cur_resp.start == 0 && cur_resp.len == 0)
 		return (false);
 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
 		return (false);
 	return (true);
 }
 
 static void
 close_connection(void)
 {
 
 	/*
 	 * XXX: This triggers a warning because mevent does the close
 	 * before the EV_DELETE.
 	 */
 	pthread_mutex_lock(&gdb_lock);
 	mevent_delete(write_event);
 	mevent_delete_close(read_event);
 	write_event = NULL;
 	read_event = NULL;
 	io_buffer_reset(&cur_comm);
 	io_buffer_reset(&cur_resp);
 	cur_fd = -1;
 
 	/* Resume any stopped vCPUs. */
 	gdb_resume_vcpus();
 	pthread_mutex_unlock(&gdb_lock);
 }
 
 static uint8_t
 hex_digit(uint8_t nibble)
 {
 
 	if (nibble <= 9)
 		return (nibble + '0');
 	else
 		return (nibble + 'a' - 10);
 }
 
 static uint8_t
 parse_digit(uint8_t v)
 {
 
 	if (v >= '0' && v <= '9')
 		return (v - '0');
 	if (v >= 'a' && v <= 'f')
 		return (v - 'a' + 10);
 	if (v >= 'A' && v <= 'F')
 		return (v - 'A' + 10);
 	return (0xF);
 }
 
 /* Parses big-endian hexadecimal. */
 static uintmax_t
 parse_integer(const uint8_t *p, size_t len)
 {
 	uintmax_t v;
 
 	v = 0;
 	while (len > 0) {
 		v <<= 4;
 		v |= parse_digit(*p);
 		p++;
 		len--;
 	}
 	return (v);
 }
 
 static uint8_t
 parse_byte(const uint8_t *p)
 {
 
 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
 }
 
 static void
 send_pending_data(int fd)
 {
 	ssize_t nwritten;
 
 	if (cur_resp.len == 0) {
 		mevent_disable(write_event);
 		return;
 	}
 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
 	if (nwritten == -1) {
 		warn("Write to GDB socket failed");
 		close_connection();
 	} else {
 		io_buffer_advance(&cur_resp, nwritten);
 		if (cur_resp.len == 0)
 			mevent_disable(write_event);
 		else
 			mevent_enable(write_event);
 	}
 }
 
 /* Append a single character to the output buffer. */
 static void
 send_char(uint8_t data)
 {
 	io_buffer_grow(&cur_resp, 1);
 	*io_buffer_tail(&cur_resp) = data;
 	cur_resp.len++;
 }
 
 /* Append an array of bytes to the output buffer. */
 static void
 send_data(const uint8_t *data, size_t len)
 {
 
 	io_buffer_grow(&cur_resp, len);
 	memcpy(io_buffer_tail(&cur_resp), data, len);
 	cur_resp.len += len;
 }
 
 static void
 format_byte(uint8_t v, uint8_t *buf)
 {
 
 	buf[0] = hex_digit(v >> 4);
 	buf[1] = hex_digit(v & 0xf);
 }
 
 /*
  * Append a single byte (formatted as two hex characters) to the
  * output buffer.
  */
 static void
 send_byte(uint8_t v)
 {
 	uint8_t buf[2];
 
 	format_byte(v, buf);
 	send_data(buf, sizeof(buf));
 }
 
 static void
 start_packet(void)
 {
 
 	send_char('$');
 	cur_csum = 0;
 }
 
 static void
 finish_packet(void)
 {
 
 	send_char('#');
 	send_byte(cur_csum);
 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
 }
 
 /*
  * Append a single character (for the packet payload) and update the
  * checksum.
  */
 static void
 append_char(uint8_t v)
 {
 
 	send_char(v);
 	cur_csum += v;
 }
 
 /*
  * Append an array of bytes (for the packet payload) and update the
  * checksum.
  */
 static void
 append_packet_data(const uint8_t *data, size_t len)
 {
 
 	send_data(data, len);
 	while (len > 0) {
 		cur_csum += *data;
 		data++;
 		len--;
 	}
 }
 
 static void
 append_string(const char *str)
 {
 
 	append_packet_data(str, strlen(str));
 }
 
 static void
 append_byte(uint8_t v)
 {
 	uint8_t buf[2];
 
 	format_byte(v, buf);
 	append_packet_data(buf, sizeof(buf));
 }
 
 static void
 append_unsigned_native(uintmax_t value, size_t len)
 {
 	size_t i;
 
 	for (i = 0; i < len; i++) {
 		append_byte(value);
 		value >>= 8;
 	}
 }
 
 static void
 append_unsigned_be(uintmax_t value, size_t len)
 {
 	char buf[len * 2];
 	size_t i;
 
 	for (i = 0; i < len; i++) {
 		format_byte(value, buf + (len - i - 1) * 2);
 		value >>= 8;
 	}
 	append_packet_data(buf, sizeof(buf));
 }
 
 static void
 append_integer(unsigned int value)
 {
 
 	if (value == 0)
 		append_char('0');
 	else
 		append_unsigned_be(value, fls(value) + 7 / 8);
 }
 
 static void
 append_asciihex(const char *str)
 {
 
 	while (*str != '\0') {
 		append_byte(*str);
 		str++;
 	}
 }
 
 static void
 send_empty_response(void)
 {
 
 	start_packet();
 	finish_packet();
 }
 
 static void
 send_error(int error)
 {
 
 	start_packet();
 	append_char('E');
 	append_byte(error);
 	finish_packet();
 }
 
 static void
 send_ok(void)
 {
 
 	start_packet();
 	append_string("OK");
 	finish_packet();
 }
 
 static int
 parse_threadid(const uint8_t *data, size_t len)
 {
 
 	if (len == 1 && *data == '0')
 		return (0);
 	if (len == 2 && memcmp(data, "-1", 2) == 0)
 		return (-1);
 	if (len == 0)
 		return (-2);
 	return (parse_integer(data, len));
 }
 
 static void
 report_stop(void)
 {
 
 	start_packet();
 	if (stopped_vcpu == -1)
 		append_char('S');
 	else
 		append_char('T');
 	append_byte(GDB_SIGNAL_TRAP);
 	if (stopped_vcpu != -1) {
 		append_string("thread:");
 		append_integer(stopped_vcpu + 1);
 		append_char(';');
 	}
 	stopped_vcpu = -1;
 	finish_packet();
 }
 
 static void
 gdb_finish_suspend_vcpus(void)
 {
 
 	if (first_stop) {
 		first_stop = false;
 		stopped_vcpu = -1;
 	} else if (response_pending())
 		stop_pending = true;
 	else {
 		report_stop();
 		send_pending_data(cur_fd);
 	}
 }
 
 static void
 _gdb_cpu_suspend(int vcpu, bool report_stop)
 {
 
 	debug("$vCPU %d suspending\n", vcpu);
 	CPU_SET(vcpu, &vcpus_waiting);
 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
 		gdb_finish_suspend_vcpus();
 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
 	CPU_CLR(vcpu, &vcpus_waiting);
 	debug("$vCPU %d resuming\n", vcpu);
 }
 
 void
 gdb_cpu_add(int vcpu)
 {
 
 	debug("$vCPU %d starting\n", vcpu);
 	pthread_mutex_lock(&gdb_lock);
 	CPU_SET(vcpu, &vcpus_active);
 
 	/*
 	 * If a vcpu is added while vcpus are stopped, suspend the new
 	 * vcpu so that it will pop back out with a debug exit before
 	 * executing the first instruction.
 	 */
 	if (!CPU_EMPTY(&vcpus_suspended)) {
 		CPU_SET(vcpu, &vcpus_suspended);
 		_gdb_cpu_suspend(vcpu, false);
 	}
 	pthread_mutex_unlock(&gdb_lock);
 }
 
 void
 gdb_cpu_suspend(int vcpu)
 {
 
 	pthread_mutex_lock(&gdb_lock);
 	_gdb_cpu_suspend(vcpu, true);
 	pthread_mutex_unlock(&gdb_lock);
 }
 
 void
 gdb_cpu_mtrap(int vcpu)
 {
 
 	debug("$vCPU %d MTRAP\n", vcpu);
 	pthread_mutex_lock(&gdb_lock);
 	if (vcpu == stepping_vcpu) {
 		stepping_vcpu = -1;
 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
 		vm_suspend_cpu(ctx, vcpu);
 		assert(stopped_vcpu == -1);
 		stopped_vcpu = vcpu;
 		_gdb_cpu_suspend(vcpu, true);
 	}
 	pthread_mutex_unlock(&gdb_lock);
 }
 
 static void
 gdb_suspend_vcpus(void)
 {
 
 	assert(pthread_mutex_isowned_np(&gdb_lock));
 	debug("suspending all CPUs\n");
 	vcpus_suspended = vcpus_active;
 	vm_suspend_cpu(ctx, -1);
 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
 		gdb_finish_suspend_vcpus();
 }
 
 static bool
 gdb_step_vcpu(int vcpu)
 {
 	int error, val;
 
 	debug("$vCPU %d step\n", vcpu);
 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
 	if (error < 0)
 		return (false);
 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
 	vm_resume_cpu(ctx, vcpu);
 	stepping_vcpu = vcpu;
 	pthread_cond_broadcast(&idle_vcpus);
 	return (true);
 }
 
 static void
 gdb_resume_vcpus(void)
 {
 
 	assert(pthread_mutex_isowned_np(&gdb_lock));
 	vm_resume_cpu(ctx, -1);
 	debug("resuming all CPUs\n");
 	CPU_ZERO(&vcpus_suspended);
 	pthread_cond_broadcast(&idle_vcpus);
 }
 
 static void
 gdb_read_regs(void)
 {
 	uint64_t regvals[nitems(gdb_regset)];
 	int i;
 
 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
 	    gdb_regset, regvals) == -1) {
 		send_error(errno);
 		return;
 	}
 	start_packet();
 	for (i = 0; i < nitems(regvals); i++)
 		append_unsigned_native(regvals[i], gdb_regsize[i]);
 	finish_packet();
 }
 
 static void
 gdb_read_mem(const uint8_t *data, size_t len)
 {
 	uint64_t gpa, gva, val;
 	uint8_t *cp;
 	size_t resid, todo, bytes;
 	bool started;
 	int error;
 
 	cp = memchr(data, ',', len);
 	if (cp == NULL) {
 		send_error(EINVAL);
 		return;
 	}
 	gva = parse_integer(data + 1, cp - (data + 1));
 	resid = parse_integer(cp + 1, len - (cp + 1 - data));
 	started = false;
 
 	while (resid > 0) {
 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
 		if (error == -1) {
 			if (started)
 				finish_packet();
 			else
 				send_error(errno);
 			return;
 		}
 		if (error == 0) {
 			if (started)
 				finish_packet();
 			else
 				send_error(EFAULT);
 			return;
 		}
 
 		/* Read bytes from current page. */
 		todo = getpagesize() - gpa % getpagesize();
 		if (todo > resid)
 			todo = resid;
 
 		cp = paddr_guest2host(ctx, gpa, todo);
 		if (cp != NULL) {
 			/*
 			 * If this page is guest RAM, read it a byte
 			 * at a time.
 			 */
 			if (!started) {
 				start_packet();
 				started = true;
 			}
 			while (todo > 0) {
 				append_byte(*cp);
 				cp++;
 				gpa++;
 				gva++;
 				resid--;
 				todo--;
 			}
 		} else {
 			/*
 			 * If this page isn't guest RAM, try to handle
 			 * it via MMIO.  For MMIO requests, use
 			 * aligned reads of words when possible.
 			 */
 			while (todo > 0) {
 				if (gpa & 1 || todo == 1)
 					bytes = 1;
 				else if (gpa & 2 || todo == 2)
 					bytes = 2;
 				else
 					bytes = 4;
 				error = read_mem(ctx, cur_vcpu, gpa, &val,
 				    bytes);
 				if (error == 0) {
 					if (!started) {
 						start_packet();
 						started = true;
 					}
 					gpa += bytes;
 					gva += bytes;
 					resid -= bytes;
 					todo -= bytes;
 					while (bytes > 0) {
 						append_byte(val);
 						val >>= 8;
 						bytes--;
 					}
 				} else {
 					if (started)
 						finish_packet();
 					else
 						send_error(EFAULT);
 					return;
 				}
 			}
 		}
 		assert(resid == 0 || gpa % getpagesize() == 0);
 	}
 	if (!started)
 		start_packet();
 	finish_packet();
 }
 
 static bool
 command_equals(const uint8_t *data, size_t len, const char *cmd)
 {
 
 	if (strlen(cmd) > len)
 		return (false);
 	return (memcmp(data, cmd, strlen(cmd)) == 0);
 }
 
 static void
 gdb_query(const uint8_t *data, size_t len)
 {
 
 	/*
 	 * TODO:
 	 * - qSearch
 	 * - qSupported
 	 */
 	if (command_equals(data, len, "qAttached")) {
 		start_packet();
 		append_char('1');
 		finish_packet();
 	} else if (command_equals(data, len, "qC")) {
 		start_packet();
 		append_string("QC");
 		append_integer(cur_vcpu + 1);
 		finish_packet();
 	} else if (command_equals(data, len, "qfThreadInfo")) {
 		cpuset_t mask;
 		bool first;
 		int vcpu;
 
 		if (CPU_EMPTY(&vcpus_active)) {
 			send_error(EINVAL);
 			return;
 		}
 		mask = vcpus_active;
 		start_packet();
 		append_char('m');
 		first = true;
 		while (!CPU_EMPTY(&mask)) {
 			vcpu = CPU_FFS(&mask) - 1;
 			CPU_CLR(vcpu, &mask);
 			if (first)
 				first = false;
 			else
 				append_char(',');
 			append_integer(vcpu + 1);
 		}
 		finish_packet();
 	} else if (command_equals(data, len, "qsThreadInfo")) {
 		start_packet();
 		append_char('l');
 		finish_packet();
 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
 		char buf[16];
 		int tid;
 
 		data += strlen("qThreadExtraInfo");
 		len -= strlen("qThreadExtraInfo");
 		if (*data != ',') {
 			send_error(EINVAL);
 			return;
 		}
 		tid = parse_threadid(data + 1, len - 1);
 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
 			send_error(EINVAL);
 			return;
 		}
 
 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
 		start_packet();
 		append_asciihex(buf);
 		finish_packet();
 	} else
 		send_empty_response();
 }
 
 static void
 handle_command(const uint8_t *data, size_t len)
 {
 
 	/* Reject packets with a sequence-id. */
 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
 		send_empty_response();
 		return;
 	}
 
 	switch (*data) {
 	case 'c':
 		if (len != 1) {
 			send_error(EINVAL);
 			break;
 		}
 
 		/* Don't send a reply until a stop occurs. */
 		gdb_resume_vcpus();
 		break;
 	case 'D':
 		send_ok();
 
 		/* TODO: Resume any stopped CPUs. */
 		break;
 	case 'g': {
 		gdb_read_regs();
 		break;
 	}
 	case 'H': {
 		int tid;
 
 		if (data[1] != 'g' && data[1] != 'c') {
 			send_error(EINVAL);
 			break;
 		}
 		tid = parse_threadid(data + 2, len - 2);
 		if (tid == -2) {
 			send_error(EINVAL);
 			break;
 		}
 
 		if (CPU_EMPTY(&vcpus_active)) {
 			send_error(EINVAL);
 			break;
 		}
 		if (tid == -1 || tid == 0)
 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
 		else if (CPU_ISSET(tid - 1, &vcpus_active))
 			cur_vcpu = tid - 1;
 		else {
 			send_error(EINVAL);
 			break;
 		}
 		send_ok();
 		break;
 	}
 	case 'm':
 		gdb_read_mem(data, len);
 		break;
 	case 'T': {
 		int tid;
 
 		tid = parse_threadid(data + 1, len - 1);
 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
 			send_error(EINVAL);
 			return;
 		}
 		send_ok();
 		break;
 	}
 	case 'q':
 		gdb_query(data, len);
 		break;
 	case 's':
 		if (len != 1) {
 			send_error(EINVAL);
 			break;
 		}
 
 		/* Don't send a reply until a stop occurs. */
 		if (!gdb_step_vcpu(cur_vcpu)) {
 			send_error(EOPNOTSUPP);
 			break;
 		}
 		break;
 	case '?':
 		/* XXX: Only if stopped? */
 		/* For now, just report that we are always stopped. */
 		start_packet();
 		append_char('S');
 		append_byte(GDB_SIGNAL_TRAP);
 		finish_packet();
 		break;
 	case 'G': /* TODO */
 	case 'M': /* TODO */
 	case 'v':
 		/* Handle 'vCont' */
 		/* 'vCtrlC' */
 	case 'p': /* TODO */
 	case 'P': /* TODO */
 	case 'Q': /* TODO */
 	case 't': /* TODO */
 	case 'X': /* TODO */
 	case 'z': /* TODO */
 	case 'Z': /* TODO */
 	default:
 		send_empty_response();
 	}
 }
 
 /* Check for a valid packet in the command buffer. */
 static void
 check_command(int fd)
 {
 	uint8_t *head, *hash, *p, sum;
 	size_t avail, plen;
 
 	for (;;) {
 		avail = cur_comm.len;
 		if (avail == 0)
 			return;
 		head = io_buffer_head(&cur_comm);
 		switch (*head) {
 		case 0x03:
 			debug("<- Ctrl-C\n");
 			io_buffer_consume(&cur_comm, 1);
 
 			gdb_suspend_vcpus();
 			break;
 		case '+':
 			/* ACK of previous response. */
 			debug("<- +\n");
 			if (response_pending())
 				io_buffer_reset(&cur_resp);
 			io_buffer_consume(&cur_comm, 1);
 			if (stop_pending) {
 				stop_pending = false;
 				report_stop();
 				send_pending_data(fd);
 			}
 			break;
 		case '-':
 			/* NACK of previous response. */
 			debug("<- -\n");
 			if (response_pending()) {
 				cur_resp.len += cur_resp.start;
 				cur_resp.start = 0;
 				if (cur_resp.data[0] == '+')
 					io_buffer_advance(&cur_resp, 1);
 				debug("-> %.*s\n", (int)cur_resp.len,
 				    io_buffer_head(&cur_resp));
 			}
 			io_buffer_consume(&cur_comm, 1);
 			send_pending_data(fd);
 			break;
 		case '$':
 			/* Packet. */
 
 			if (response_pending()) {
 				warnx("New GDB command while response in "
 				    "progress");
 				io_buffer_reset(&cur_resp);
 			}
 
 			/* Is packet complete? */
 			hash = memchr(head, '#', avail);
 			if (hash == NULL)
 				return;
 			plen = (hash - head + 1) + 2;
 			if (avail < plen)
 				return;
 			debug("<- %.*s\n", (int)plen, head);
 
 			/* Verify checksum. */
 			for (sum = 0, p = head + 1; p < hash; p++)
 				sum += *p;
 			if (sum != parse_byte(hash + 1)) {
 				io_buffer_consume(&cur_comm, plen);
 				debug("-> -\n");
 				send_char('-');
 				send_pending_data(fd);
 				break;
 			}
 			send_char('+');
 
 			handle_command(head + 1, hash - (head + 1));
 			io_buffer_consume(&cur_comm, plen);
 			if (!response_pending())
 				debug("-> +\n");
 			send_pending_data(fd);
 			break;
 		default:
 			/* XXX: Possibly drop connection instead. */
 			debug("-> %02x\n", *head);
 			io_buffer_consume(&cur_comm, 1);
 			break;
 		}
 	}
 }
 
 static void
 gdb_readable(int fd, enum ev_type event, void *arg)
 {
 	ssize_t nread;
 	int pending;
 
 	if (ioctl(fd, FIONREAD, &pending) == -1) {
 		warn("FIONREAD on GDB socket");
 		return;
 	}
 
 	/*
 	 * 'pending' might be zero due to EOF.  We need to call read
 	 * with a non-zero length to detect EOF.
 	 */
 	if (pending == 0)
 		pending = 1;
 
 	/* Ensure there is room in the command buffer. */
 	io_buffer_grow(&cur_comm, pending);
 	assert(io_buffer_avail(&cur_comm) >= pending);
 
 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
 	if (nread == 0) {
 		close_connection();
 	} else if (nread == -1) {
 		if (errno == EAGAIN)
 			return;
 
 		warn("Read from GDB socket");
 		close_connection();
 	} else {
 		cur_comm.len += nread;
 		pthread_mutex_lock(&gdb_lock);
 		check_command(fd);
 		pthread_mutex_unlock(&gdb_lock);
 	}
 }
 
 static void
 gdb_writable(int fd, enum ev_type event, void *arg)
 {
 
 	send_pending_data(fd);
 }
 
 static void
 new_connection(int fd, enum ev_type event, void *arg)
 {
 	int optval, s;
 
 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
 	if (s == -1) {
 		if (arg != NULL)
 			err(1, "Failed accepting initial GDB connection");
 
 		/* Silently ignore errors post-startup. */
 		return;
 	}
 
 	optval = 1;
 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
 	    -1) {
 		warn("Failed to disable SIGPIPE for GDB connection");
 		close(s);
 		return;
 	}
 
 	pthread_mutex_lock(&gdb_lock);
 	if (cur_fd != -1) {
 		close(s);
 		warnx("Ignoring additional GDB connection.");
 	}
 
 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
 	if (read_event == NULL) {
 		if (arg != NULL)
 			err(1, "Failed to setup initial GDB connection");
 		pthread_mutex_unlock(&gdb_lock);
 		return;
 	}
 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
 	if (write_event == NULL) {
 		if (arg != NULL)
 			err(1, "Failed to setup initial GDB connection");
 		mevent_delete_close(read_event);
 		read_event = NULL;
 	}
 
 	cur_fd = s;
 	cur_vcpu = 0;
 	stepping_vcpu = -1;
 	stopped_vcpu = -1;
 	stop_pending = false;
 
 	/* Break on attach. */
 	first_stop = true;
 	gdb_suspend_vcpus();
 	pthread_mutex_unlock(&gdb_lock);
 }
 
 #ifndef WITHOUT_CAPSICUM
 void
 limit_gdb_socket(int s)
 {
 	cap_rights_t rights;
 	unsigned long ioctls[] = { FIONREAD };
 
 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
 	    CAP_SETSOCKOPT, CAP_IOCTL);
 	if (caph_rights_limit(s, &rights) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 }
 #endif
 
 void
 init_gdb(struct vmctx *_ctx, int sport, bool wait)
 {
 	struct sockaddr_in sin;
 	int error, flags, s;
 
 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
 
 	error = pthread_mutex_init(&gdb_lock, NULL);
 	if (error != 0)
 		errc(1, error, "gdb mutex init");
 	error = pthread_cond_init(&idle_vcpus, NULL);
 	if (error != 0)
 		errc(1, error, "gdb cv init");
 
 	ctx = _ctx;
 	s = socket(PF_INET, SOCK_STREAM, 0);
 	if (s < 0)
 		err(1, "gdb socket create");
 
 	sin.sin_len = sizeof(sin);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
 	sin.sin_port = htons(sport);
 
 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
 		err(1, "gdb socket bind");
 
 	if (listen(s, 1) < 0)
 		err(1, "gdb socket listen");
 
 	if (wait) {
 		/*
 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
 		 * logic in gdb_cpu_add() to suspend the first vcpu before
 		 * it starts execution.  The vcpu will remain suspended
 		 * until a debugger connects.
 		 */
 		stepping_vcpu = -1;
 		stopped_vcpu = -1;
 		CPU_SET(0, &vcpus_suspended);
 	}
 
 	flags = fcntl(s, F_GETFL);
 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
 		err(1, "Failed to mark gdb socket non-blocking");
 
 #ifndef WITHOUT_CAPSICUM
 	limit_gdb_socket(s);
 #endif
 	mevent_add(s, EVF_READ, new_connection, NULL);
 }
Index: head/usr.sbin/bhyve/gdb.h
===================================================================
--- head/usr.sbin/bhyve/gdb.h	(revision 344854)
+++ head/usr.sbin/bhyve/gdb.h	(revision 344855)
@@ -1,39 +1,38 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017 John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __GDB_H__
 #define	__GDB_H__
 
 void	gdb_cpu_add(int vcpu);
 void	gdb_cpu_mtrap(int vcpu);
 void	gdb_cpu_suspend(int vcpu);
 void	init_gdb(struct vmctx *ctx, int sport, bool wait);
 
 #endif /* !__GDB_H__ */
Index: head/usr.sbin/devctl/devctl.8
===================================================================
--- head/usr.sbin/devctl/devctl.8	(revision 344854)
+++ head/usr.sbin/devctl/devctl.8	(revision 344855)
@@ -1,178 +1,177 @@
 .\"
 .\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
-.\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd August 29, 2016
 .Dt DEVCTL 8
 .Os
 .Sh NAME
 .Nm devctl
 .Nd device control utility
 .Sh SYNOPSIS
 .Nm
 .Cm attach
 .Ar device
 .Nm
 .Cm clear driver
 .Op Fl f
 .Ar device
 .Nm
 .Cm detach
 .Op Fl f
 .Ar device
 .Nm
 .Cm disable
 .Op Fl f
 .Ar device
 .Nm
 .Cm enable
 .Ar device
 .Nm
 .Cm suspend
 .Ar device
 .Nm
 .Cm resume
 .Ar device
 .Nm
 .Cm set driver
 .Op Fl f
 .Ar device driver
 .Nm
 .Cm rescan
 .Ar device
 .Nm
 .Cm delete
 .Op Fl f
 .Ar device
 .Sh DESCRIPTION
 The
 .Nm
 utility adjusts the state of individual devices in the kernel's
 internal device hierarchy.
 Each invocation of
 .Nm
 consists of a single command followed by command-specific arguments.
 Each command operates on a single device specified via the
 .Ar device
 argument.
 The
 .Ar device
 may be specified either as the name of an existing device or as a
 bus-specific address.
 More details on supported address formats can be found in
 .Xr devctl 3 .
 .Pp
 The following commands are supported:
 .Bl -tag -width indent
 .It Cm attach Ar device
 Force the kernel to re-probe the device.
 If a suitable driver is found,
 it is attached to the device.
 .It Xo Cm detach
 .Op Fl f
 .Ar device
 .Xc
 Detach the device from its current device driver.
 If the
 .Fl f
 flag is specified,
 the device driver will be detached even if the device is busy.
 .It Xo Cm disable
 .Op Fl f
 .Ar device
 .Xc
 Disable a device.
 If the device is currently attached to a device driver,
 the device driver will be detached from the device,
 but the device will retain its current name.
 If the
 .Fl f
 flag is specified,
 the device driver will be detached even if the device is busy.
 .It Cm enable Ar device
 Enable a device.
 The device will probe and attach if a suitable device driver is found.
 Note that this can re-enable a device disabled at boot time via a
 loader tunable.
 .It Cm suspend Ar device
 Suspend a device.
 This may include placing the device in a reduced power state.
 .It Cm resume Ar device
 Resume a suspended device to a fully working state.
 .It Xo Cm set driver
 .Op Fl f
 .Ar device driver
 .Xc
 Force the device to use a device driver named
 .Ar driver .
 If the device is already attached to a device driver and the
 .Fl f
 flag is specified,
 the device will be detached from its current device driver before it is
 attached to the new device driver.
 If the device is already attached to a device driver and the
 .Fl f
 flag is not specified,
 the device will not be changed.
 .It Xo Cm clear driver
 .Op Fl f
 .Ar device
 .Xc
 Clear a previously-forced driver name so that the device is able to use any
 valid device driver.
 After the previous name has been cleared,
 the device is reprobed so that other device drivers may attach to it.
 This can be used to undo an earlier
 .Cm set driver
 command.
 If the device is currently attached to a device driver and the
 .Fl f
 flag is not specified,
 the device will not be changed.
 .It Cm rescan Ar device
 Rescan a bus device checking for devices that have been added or
 removed.
 .It Xo Cm delete
 .Op Fl f
 .Ar device
 .Xc
 Delete the device from the device tree.
 If the
 .Fl f
 flag is specified,
 the device will be deleted even if it is physically present.
 This command should be used with care as a device that is deleted but present
 can no longer be used unless the parent bus device rediscovers the device via
 a rescan request.
 .El
 .Sh SEE ALSO
 .Xr devctl 3 ,
 .Xr devinfo 8
 .Sh HISTORY
 The
 .Nm
 utility first appeared in
 .Fx 10.3 .
Index: head/usr.sbin/devctl/devctl.c
===================================================================
--- head/usr.sbin/devctl/devctl.c	(revision 344854)
+++ head/usr.sbin/devctl/devctl.c	(revision 344855)
@@ -1,408 +1,407 @@
 /*-
  * Copyright (c) 2014 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/linker_set.h>
 #include <devctl.h>
 #include <err.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 
 struct devctl_command {
 	const char *name;
 	int (*handler)(int ac, char **av);
 };
 
 #define	DEVCTL_DATASET(name)	devctl_ ## name ## _table
 
 #define	DEVCTL_COMMAND(set, name, function)				\
 	static struct devctl_command function ## _devctl_command =	\
 	{ #name, function };						\
 	DATA_SET(DEVCTL_DATASET(set), function ## _devctl_command)
 
 #define	DEVCTL_TABLE(set, name)						\
 	SET_DECLARE(DEVCTL_DATASET(name), struct devctl_command);	\
 									\
 	static int							\
 	devctl_ ## name ## _table_handler(int ac, char **av)		\
 	{								\
 		return (devctl_table_handler(SET_BEGIN(DEVCTL_DATASET(name)), \
 		    SET_LIMIT(DEVCTL_DATASET(name)), ac, av));		\
 	}								\
 	DEVCTL_COMMAND(set, name, devctl_ ## name ## _table_handler)
 
 static int	devctl_table_handler(struct devctl_command **start,
     struct devctl_command **end, int ac, char **av);
 
 SET_DECLARE(DEVCTL_DATASET(top), struct devctl_command);
 
 DEVCTL_TABLE(top, clear);
 DEVCTL_TABLE(top, set);
 
 static void
 usage(void)
 {
 	fprintf(stderr,
 	    "usage: devctl attach device\n"
 	    "       devctl detach [-f] device\n"
 	    "       devctl disable [-f] device\n"
 	    "       devctl enable device\n"
 	    "       devctl suspend device\n"
 	    "       devctl resume device\n"
 	    "       devctl set driver [-f] device driver\n"
 	    "       devctl clear driver [-f] device\n"
 	    "       devctl rescan device\n"
 	    "       devctl delete [-f] device\n"
 	    "       devctl freeze\n"
 	    "       devctl thaw\n");
 	exit(1);
 }
 
 static int
 devctl_table_handler(struct devctl_command **start,
     struct devctl_command **end, int ac, char **av)
 {
 	struct devctl_command **cmd;
 
 	if (ac < 2) {
 		warnx("The %s command requires a sub-command.", av[0]);
 		return (EINVAL);
 	}
 	for (cmd = start; cmd < end; cmd++) {
 		if (strcmp((*cmd)->name, av[1]) == 0)
 			return ((*cmd)->handler(ac - 1, av + 1));
 	}
 
 	warnx("%s is not a valid sub-command of %s.", av[1], av[0]);
 	return (ENOENT);
 }
 
 static int
 help(int ac __unused, char **av __unused)
 {
 
 	usage();
 	return (0);
 }
 DEVCTL_COMMAND(top, help, help);
 
 static int
 attach(int ac, char **av)
 {
 
 	if (ac != 2)
 		usage();
 	if (devctl_attach(av[1]) < 0)
 		err(1, "Failed to attach %s", av[1]);
 	return (0);
 }
 DEVCTL_COMMAND(top, attach, attach);
 
 static void
 detach_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl detach [-f] device\n");
 	exit(1);
 }
 
 static int
 detach(int ac, char **av)
 {
 	bool force;
 	int ch;
 
 	force = false;
 	while ((ch = getopt(ac, av, "f")) != -1)
 		switch (ch) {
 		case 'f':
 			force = true;
 			break;
 		default:
 			detach_usage();
 		}
 	ac -= optind;
 	av += optind;
 
 	if (ac != 1)
 		detach_usage();
 	if (devctl_detach(av[0], force) < 0)
 		err(1, "Failed to detach %s", av[0]);
 	return (0);
 }
 DEVCTL_COMMAND(top, detach, detach);
 
 static void
 disable_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl disable [-f] device\n");
 	exit(1);
 }
 
 static int
 disable(int ac, char **av)
 {
 	bool force;
 	int ch;
 
 	force = false;
 	while ((ch = getopt(ac, av, "f")) != -1)
 		switch (ch) {
 		case 'f':
 			force = true;
 			break;
 		default:
 			disable_usage();
 		}
 	ac -= optind;
 	av += optind;
 
 	if (ac != 1)
 		disable_usage();
 	if (devctl_disable(av[0], force) < 0)
 		err(1, "Failed to disable %s", av[0]);
 	return (0);
 }
 DEVCTL_COMMAND(top, disable, disable);
 
 static int
 enable(int ac, char **av)
 {
 
 	if (ac != 2)
 		usage();
 	if (devctl_enable(av[1]) < 0)
 		err(1, "Failed to enable %s", av[1]);
 	return (0);
 }
 DEVCTL_COMMAND(top, enable, enable);
 
 static int
 suspend(int ac, char **av)
 {
 
 	if (ac != 2)
 		usage();
 	if (devctl_suspend(av[1]) < 0)
 		err(1, "Failed to suspend %s", av[1]);
 	return (0);
 }
 DEVCTL_COMMAND(top, suspend, suspend);
 
 static int
 resume(int ac, char **av)
 {
 
 	if (ac != 2)
 		usage();
 	if (devctl_resume(av[1]) < 0)
 		err(1, "Failed to resume %s", av[1]);
 	return (0);
 }
 DEVCTL_COMMAND(top, resume, resume);
 
 static void
 set_driver_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl set driver [-f] device driver\n");
 	exit(1);
 }
 
 static int
 set_driver(int ac, char **av)
 {
 	bool force;
 	int ch;
 
 	force = false;
 	while ((ch = getopt(ac, av, "f")) != -1)
 		switch (ch) {
 		case 'f':
 			force = true;
 			break;
 		default:
 			set_driver_usage();
 		}
 	ac -= optind;
 	av += optind;
 
 	if (ac != 2)
 		set_driver_usage();
 	if (devctl_set_driver(av[0], av[1], force) < 0)
 		err(1, "Failed to set %s driver to %s", av[0], av[1]);
 	return (0);
 }
 DEVCTL_COMMAND(set, driver, set_driver);
 
 static void
 clear_driver_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl clear driver [-f] device\n");
 	exit(1);
 }
 
 static int
 clear_driver(int ac, char **av)
 {
 	bool force;
 	int ch;
 
 	force = false;
 	while ((ch = getopt(ac, av, "f")) != -1)
 		switch (ch) {
 		case 'f':
 			force = true;
 			break;
 		default:
 			clear_driver_usage();
 		}
 	ac -= optind;
 	av += optind;
 
 	if (ac != 1)
 		clear_driver_usage();
 	if (devctl_clear_driver(av[0], force) < 0)
 		err(1, "Failed to clear %s driver", av[0]);
 	return (0);
 }
 DEVCTL_COMMAND(clear, driver, clear_driver);
 
 static int
 rescan(int ac, char **av)
 {
 
 	if (ac != 2)
 		usage();
 	if (devctl_rescan(av[1]) < 0)
 		err(1, "Failed to rescan %s", av[1]);
 	return (0);
 }
 DEVCTL_COMMAND(top, rescan, rescan);
 
 static void
 delete_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl delete [-f] device\n");
 	exit(1);
 }
 
 static int
 delete(int ac, char **av)
 {
 	bool force;
 	int ch;
 
 	force = false;
 	while ((ch = getopt(ac, av, "f")) != -1)
 		switch (ch) {
 		case 'f':
 			force = true;
 			break;
 		default:
 			delete_usage();
 		}
 	ac -= optind;
 	av += optind;
 
 	if (ac != 1)
 		delete_usage();
 	if (devctl_delete(av[0], force) < 0)
 		err(1, "Failed to delete %s", av[0]);
 	return (0);
 }
 DEVCTL_COMMAND(top, delete, delete);
 
 static void
 freeze_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl freeze\n");
 	exit(1);
 }
 
 static int
 freeze(int ac, char **av __unused)
 {
 
 	if (ac != 1)
 		freeze_usage();
 	if (devctl_freeze() < 0)
 		err(1, "Failed to freeze probe/attach");
 	return (0);
 }
 DEVCTL_COMMAND(top, freeze, freeze);
 
 static void
 thaw_usage(void)
 {
 
 	fprintf(stderr, "usage: devctl thaw\n");
 	exit(1);
 }
 
 static int
 thaw(int ac, char **av __unused)
 {
 
 	if (ac != 1)
 		thaw_usage();
 	if (devctl_thaw() < 0)
 		err(1, "Failed to thaw probe/attach");
 	return (0);
 }
 DEVCTL_COMMAND(top, thaw, thaw);
 
 int
 main(int ac, char *av[])
 {
 	struct devctl_command **cmd;
 
 	if (ac == 1)
 		usage();
 	ac--;
 	av++;
 
 	SET_FOREACH(cmd, DEVCTL_DATASET(top)) {
 		if (strcmp((*cmd)->name, av[0]) == 0) {
 			if ((*cmd)->handler(ac, av) != 0)
 				return (1);
 			else
 				return (0);
 		}
 	}
 	warnx("Unknown command %s.", av[0]);
 	return (1);
 }